public void displayAuthors() { if (bestFormat != null) { logger.info("Best result (" + bestFormat.getName() + "):"); for (Author author : authorListList.get(0)) { logger.info(author); } if (authorListList.size() > 1) { List<Author> bestList = authorListList.get(0); boolean alternative = false; for (List<Author> list : authorListList) { if (list != bestList) { if (!alternative) { logger.info("There are alternative interpretations:"); alternative = true; } for (int i = 0; i < list.size(); i++) { logger.info(list.get(i)); if (bestList.size() > i) { logger.info((list.get(i).equals(bestList.get(i)) ? "(identical)" : "(differing)")); } } } } } } else { logger.debug("Diese Eingabe entspricht keinem bekannten Format."); } }
/** * Constructor that starts the processing of a given author string. * * @param authors The author string to be parsed. * @param bestOnly Indicates if only the first (and best) result should be evaluated. * @throws Exception Any Exception. */ public AuthorDecoder(String authors, boolean bestOnly) throws Exception { // Remove newlines that have a separator before or after it // replace newlines before or after commas authors = authors.replaceAll(",\\s*\\n\\s*", ", "); authors = authors.replaceAll("\\s*\\n\\s*,", ","); // replace newlines before or after semicolons authors = authors.replaceAll(";\\s*\\n\\s*", "; "); authors = authors.replaceAll("\\s*\\n\\s*;", ";"); // replace newlines before or after "and" authors = authors.replaceAll(" and\\s*\\n\\s*", " and "); authors = authors.replaceAll("\\s*\\n\\s*and ", " and "); // replace newlines before or after "und" authors = authors.replaceAll(" und\\s*\\n\\s*", " und "); authors = authors.replaceAll("\\s*\\n\\s*und ", " und "); // //replace newlines before or after "et" // authors = authors.replaceAll(" et\\s*\\n\\s*", " et "); // authors = authors.replaceAll("\\s*\\n\\s*et ", " et "); // // //replace ", and" // authors = authors.replaceAll(",\\s*and\\s+", ", "); // authors = authors.replaceAll(";\\s*and\\s+", "; "); // // //normalize the string // //authors = authors.replaceAll("(\\s)+", " ").trim(); // authors = authors.trim(); logger.debug("Testing '" + authors + "'"); // remove "et al." from the authorsString if (authors.contains("et al.")) { authors = authors.substring(0, authors.indexOf("et al.")).trim(); } AuthorFormat[] authorFormats = AuthorFormatList.getFormats(); for (AuthorFormat authorFormat : authorFormats) { logger.debug(authorFormat.getName()); try { // Pattern pattern = Pattern.compile(authorFormat.getPattern()); // Matcher matcher = pattern.matcher(authors); List<Author> authorList = authorFormat.getAuthors(authorFormat.normalize(authors)); if (authorList != null) { logger.debug("Pattern found!"); analyzeAuthors(authorList); authorListList.add(authorList); if (bestFormat == null) { bestFormat = authorFormat; if (bestOnly) { break; } } } } catch (StackOverflowError e) { logger.error("Could not apply format \"" + authorFormat.getName() + "\"", e); } } }