Example #1
0
  public void displayAuthors() {
    if (bestFormat != null) {
      logger.info("Best result (" + bestFormat.getName() + "):");
      for (Author author : authorListList.get(0)) {
        logger.info(author);
      }

      if (authorListList.size() > 1) {
        List<Author> bestList = authorListList.get(0);

        boolean alternative = false;

        for (List<Author> list : authorListList) {
          if (list != bestList) {
            if (!alternative) {
              logger.info("There are alternative interpretations:");
              alternative = true;
            }
            for (int i = 0; i < list.size(); i++) {
              logger.info(list.get(i));

              if (bestList.size() > i) {
                logger.info((list.get(i).equals(bestList.get(i)) ? "(identical)" : "(differing)"));
              }
            }
          }
        }
      }
    } else {
      logger.debug("Diese Eingabe entspricht keinem bekannten Format.");
    }
  }
Example #2
0
  /**
   * Constructor that starts the processing of a given author string.
   *
   * @param authors The author string to be parsed.
   * @param bestOnly Indicates if only the first (and best) result should be evaluated.
   * @throws Exception Any Exception.
   */
  public AuthorDecoder(String authors, boolean bestOnly) throws Exception {
    // Remove newlines that have a separator before or after it

    // replace newlines before or after commas
    authors = authors.replaceAll(",\\s*\\n\\s*", ", ");
    authors = authors.replaceAll("\\s*\\n\\s*,", ",");

    // replace newlines before or after semicolons
    authors = authors.replaceAll(";\\s*\\n\\s*", "; ");
    authors = authors.replaceAll("\\s*\\n\\s*;", ";");

    // replace newlines before or after "and"
    authors = authors.replaceAll(" and\\s*\\n\\s*", " and ");
    authors = authors.replaceAll("\\s*\\n\\s*and ", " and ");

    // replace newlines before or after "und"
    authors = authors.replaceAll(" und\\s*\\n\\s*", " und ");
    authors = authors.replaceAll("\\s*\\n\\s*und ", " und ");

    // //replace newlines before or after "et"
    // authors = authors.replaceAll(" et\\s*\\n\\s*", " et ");
    // authors = authors.replaceAll("\\s*\\n\\s*et ", " et ");
    //
    // //replace ", and"
    // authors = authors.replaceAll(",\\s*and\\s+", ", ");
    // authors = authors.replaceAll(";\\s*and\\s+", "; ");
    //
    // //normalize the string
    // //authors = authors.replaceAll("(\\s)+", " ").trim();
    // authors = authors.trim();

    logger.debug("Testing '" + authors + "'");

    // remove "et al." from the authorsString
    if (authors.contains("et al.")) {
      authors = authors.substring(0, authors.indexOf("et al.")).trim();
    }

    AuthorFormat[] authorFormats = AuthorFormatList.getFormats();

    for (AuthorFormat authorFormat : authorFormats) {

      logger.debug(authorFormat.getName());
      try {
        // Pattern pattern = Pattern.compile(authorFormat.getPattern());
        // Matcher matcher = pattern.matcher(authors);

        List<Author> authorList = authorFormat.getAuthors(authorFormat.normalize(authors));
        if (authorList != null) {
          logger.debug("Pattern found!");
          analyzeAuthors(authorList);
          authorListList.add(authorList);
          if (bestFormat == null) {
            bestFormat = authorFormat;
            if (bestOnly) {
              break;
            }
          }
        }
      } catch (StackOverflowError e) {
        logger.error("Could not apply format \"" + authorFormat.getName() + "\"", e);
      }
    }
  }