/** * Returns the text stored in the given field of the given bibtex entry which belongs to the given * database. * * <p>If a database is given, this function will try to resolve any string references in the * field-value. Also, if a database is given, this function will try to find values for unset * fields in the entry linked by the "crossref" field, if any. * * @param field The field to return the value of. * @param bibtex maybenull The bibtex entry which contains the field. * @param database maybenull The database of the bibtex entry. * @return The resolved field value or null if not found. */ public static String getResolvedField(String field, BibtexEntry bibtex, BibtexDatabase database) { if (field.equals("bibtextype")) return bibtex.getType().getName(); // TODO: Changed this to also consider alias fields, which is the expected // behavior for the preview layout and for the check whatever all fields are present. // But there might be unwanted side-effects?! Object o = bibtex.getFieldOrAlias(field); // If this field is not set, and the entry has a crossref, try to look up the // field in the referred entry: Do not do this for the bibtex key. if ((o == null) && (database != null) && database.followCrossrefs && !field.equals(BibtexFields.KEY_FIELD)) { Object crossRef = bibtex.getField("crossref"); if (crossRef != null) { BibtexEntry referred = database.getEntryByKey((String) crossRef); if (referred != null) { // Ok, we found the referred entry. Get the field value from that // entry. If it is unset there, too, stop looking: o = referred.getField(field); } } } return getText((String) o, database); }
private BibtexEntry parseNextEntry(String allText, int startIndex) { BibtexEntry entry = null; int index = allText.indexOf("<div class=\"detail", piv); int endIndex = allText.indexOf("</div>", index); if (index >= 0 && endIndex > 0) { endIndex += 6; piv = endIndex; String text = allText.substring(index, endIndex); BibtexEntryType type = null; String sourceField = null; String typeName = ""; Matcher typeMatcher = typePattern.matcher(text); if (typeMatcher.find()) { typeName = typeMatcher.group(1); if (typeName.equalsIgnoreCase("IEEE Journals & Magazines") || typeName.equalsIgnoreCase("IEEE Early Access Articles") || typeName.equalsIgnoreCase("IET Journals & Magazines") || typeName.equalsIgnoreCase("AIP Journals & Magazines") || typeName.equalsIgnoreCase("AVS Journals & Magazines") || typeName.equalsIgnoreCase("IBM Journals & Magazines") || typeName.equalsIgnoreCase("TUP Journals & Magazines") || typeName.equalsIgnoreCase("BIAI Journals & Magazines")) { type = BibtexEntryType.getType("article"); sourceField = "journal"; } else if (typeName.equalsIgnoreCase("IEEE Conference Publications") || typeName.equalsIgnoreCase("IET Conference Publications") || typeName.equalsIgnoreCase("VDE Conference Publications")) { type = BibtexEntryType.getType("inproceedings"); sourceField = "booktitle"; } else if (typeName.equalsIgnoreCase("IEEE Standards") || typeName.equalsIgnoreCase("Standards")) { type = BibtexEntryType.getType("standard"); sourceField = "number"; } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) { type = BibtexEntryType.getType("Electronic"); sourceField = "note"; } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters") || typeName.equalsIgnoreCase("MIT Press eBook Chapters") || typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { type = BibtexEntryType.getType("inCollection"); sourceField = "booktitle"; } } if (type == null) { type = BibtexEntryType.getType("misc"); sourceField = "note"; System.err.println("Type detection failed. Use MISC instead."); unparseable++; System.err.println(text); } entry = new BibtexEntry(IdGenerator.next(), type); if (typeName.equalsIgnoreCase("IEEE Standards")) { entry.setField("organization", "IEEE"); } if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) { entry.setField("publisher", "Wiley-IEEE Press"); } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) { entry.setField("publisher", "MIT Press"); } else if (typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { entry.setField("publisher", "IEEE USA"); } if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) { entry.setField("note", "Early Access"); } Set<String> fields = fieldPatterns.keySet(); for (String field : fields) { Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text); if (fieldMatcher.find()) { entry.setField(field, htmlConverter.format(fieldMatcher.group(1))); if (field.equals("title") && fieldMatcher.find()) { String sec_title = htmlConverter.format(fieldMatcher.group(1)); if (entry.getType() == BibtexEntryType.getStandardType("standard")) { sec_title = sec_title.replaceAll("IEEE Std ", ""); } entry.setField(sourceField, sec_title); } if (field.equals("pages") && fieldMatcher.groupCount() == 2) { entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2)); } } } Matcher authorMatcher = authorPattern.matcher(text); // System.out.println(text); StringBuilder authorNames = new StringBuilder(""); int authorCount = 0; while (authorMatcher.find()) { if (authorCount >= 1) { authorNames.append(" and "); } authorNames.append(htmlConverter.format(authorMatcher.group(1))); // System.out.println(authorCount + ": " + authorMatcher.group(1)); authorCount++; } entry.setField("author", authorNames.toString()); if (entry.getField("author") == null || entry.getField("author").startsWith("a href") || entry .getField("author") .startsWith("Topic(s)")) { // Fix for some documents without authors entry.setField("author", ""); } if (entry.getType() == BibtexEntryType.getStandardType("inproceedings") && entry.getField("author").equals("")) { entry.setType(BibtexEntryType.getStandardType("proceedings")); } if (includeAbstract) { index = text.indexOf("id=\"abstract"); if (index >= 0) { endIndex = text.indexOf("</div>", index) + 6; text = text.substring(index, endIndex); Matcher absMatcher = absPattern.matcher(text); if (absMatcher.find()) { // Clean-up abstract String abstr = absMatcher.group(1); abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1"); entry.setField("abstract", htmlConverter.format(abstr)); } } } } if (entry == null) { return null; } else { return cleanup(entry); } }
private BibtexEntry cleanup(BibtexEntry entry) { if (entry == null) { return null; } // clean up title String title = entry.getField("title"); if (title != null) { // USe the alt-text and replace image links title = title.replaceAll("[ ]?img src=[^ ]+ alt=\"([^\"]+)\">[ ]?", "\\$$1\\$"); // Try to sort out most of the /spl / conversions // Deal with this specific nested type first title = title.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$"); title = title.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$"); // Replace general expressions title = title.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$"); // Deal with subscripts and superscripts if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) { title = title.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$"); title = title.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$"); title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$"); title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$"); } else { title = title.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}"); title = title.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}"); title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}"); title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}"); } // Replace \infin with \infty title = title.replaceAll("\\\\infin", "\\\\infty"); // Unit formatting if (Globals.prefs.getBoolean(JabRefPreferences.USE_UNIT_FORMATTER_ON_SEARCH)) { title = unitFormatter.format(title); } // Automatic case keeping if (Globals.prefs.getBoolean(JabRefPreferences.USE_CASE_KEEPER_ON_SEARCH)) { title = caseKeeper.format(title); } // Write back entry.setField("title", title); } // clean up author /* String author = (String)entry.getField("author"); if (author != null) { if (author.indexOf("a href=") >= 0) { // Author parsing failed because it was empty entry.setField("author",""); // Maybe not needed anymore due to another change } else { author = author.replaceAll("\\s+", " "); author = author.replaceAll("\\.", ". "); author = author.replaceAll("([^;]+),([^;]+),([^;]+)","$1,$3,$2"); // Change order in case of Jr. etc author = author.replaceAll(" ", " "); author = author.replaceAll("\\. -", ".-"); author = author.replaceAll("; ", " and "); author = author.replaceAll(" ,", ","); author = author.replaceAll(" ", " "); author = author.replaceAll("[ ,;]+$", ""); entry.setField("author", author); } }*/ // clean up month String month = entry.getField("month"); if (month != null && !month.isEmpty()) { month = month.replaceAll("\\.", ""); month = month.toLowerCase(); Pattern monthPattern = Pattern.compile("(\\d*+)\\s*([a-z]*+)-*(\\d*+)\\s*([a-z]*+)"); Matcher mm = monthPattern.matcher(month); String date = month; if (mm.find()) { if (mm.group(3).isEmpty()) { if (!mm.group(2).isEmpty()) { date = "#" + mm.group(2).substring(0, 3) + "#"; if (!mm.group(1).isEmpty()) { date += " " + mm.group(1) + ","; } } else { date = mm.group(1) + ","; } } else if (mm.group(2).isEmpty()) { if (!mm.group(4).isEmpty()) { date = "#" + mm.group(4).substring(0, 3) + "# " + mm.group(1) + "--" + mm.group(3) + ","; } else { date += ","; } } else { date = "#" + mm.group(2).substring(0, 3) + "# " + mm.group(1) + "--#" + mm.group(4).substring(0, 3) + "# " + mm.group(3) + ","; } } // date = date.trim(); // if (!date.isEmpty()) { entry.setField("month", date); // } } // clean up pages String field = "pages"; String pages = entry.getField(field); if (pages != null) { String[] pageNumbers = pages.split("-"); if (pageNumbers.length == 2) { if (pageNumbers[0].equals(pageNumbers[1])) { // single page entry.setField(field, pageNumbers[0]); } else { entry.setField(field, pages.replaceAll("-", "--")); } } } // clean up publication field BibtexEntryType type = entry.getType(); String sourceField = ""; if (type.getName().equals("Article")) { sourceField = "journal"; entry.clearField("booktitle"); } else if (type.getName().equals("Inproceedings")) { sourceField = "booktitle"; } String fullName = entry.getField(sourceField); if (fullName != null) { if (type.getName().equals("Article")) { int ind = fullName.indexOf(": Accepted for future publication"); if (ind > 0) { fullName = fullName.substring(0, ind); entry.setField("year", "to be published"); entry.clearField("month"); entry.clearField("pages"); entry.clearField("number"); } String[] parts = fullName.split("[\\[\\]]"); // [see also...], [legacy...] fullName = parts[0]; if (parts.length == 3) { fullName += parts[2]; } if (entry.getField("note").equals("Early Access")) { entry.setField("year", "to be published"); entry.clearField("month"); entry.clearField("pages"); entry.clearField("number"); } } else { fullName = fullName .replace("Conference Proceedings", "Proceedings") .replace("Proceedings of", "Proceedings") .replace("Proceedings.", "Proceedings"); fullName = fullName.replaceAll("International", "Int."); fullName = fullName.replaceAll("Symposium", "Symp."); fullName = fullName.replaceAll("Conference", "Conf."); fullName = fullName.replaceAll(" on", " ").replace(" ", " "); } Matcher m1 = publicationPattern.matcher(fullName); String abrvPattern = ".*[^,] '?\\d+\\)?"; if (m1.find()) { String prefix = m1.group(2).trim(); String postfix = m1.group(1).trim(); String abrv = ""; String[] parts = prefix.split("\\. ", 2); if (parts.length == 2) { if (parts[0].matches(abrvPattern)) { prefix = parts[1]; abrv = parts[0]; } else { prefix = parts[0]; abrv = parts[1]; } } if (!prefix.matches(abrvPattern)) { fullName = prefix + " " + postfix + " " + abrv; fullName = fullName.trim(); } else { fullName = postfix + " " + prefix; } } if (type.getName().equals("Article")) { fullName = fullName.replace(" - ", "-"); // IEE Proceedings- fullName = fullName.trim(); if (Globals.prefs.getBoolean(JabRefPreferences.USE_IEEE_ABRV)) { fullName = Globals.journalAbbrev.getMedlineAbbreviation(fullName).orElse(fullName); } } if (type.getName().equals("Inproceedings")) { Matcher m2 = proceedingPattern.matcher(fullName); if (m2.find()) { String prefix = m2.group(2); String postfix = m2.group(1).replaceAll("\\.$", ""); if (!prefix.matches(abrvPattern)) { String abrv = ""; String[] parts = postfix.split("\\. ", 2); if (parts.length == 2) { if (parts[0].matches(abrvPattern)) { postfix = parts[1]; abrv = parts[0]; } else { postfix = parts[0]; abrv = parts[1]; } } fullName = prefix.trim() + " " + postfix.trim() + " " + abrv; } else { fullName = postfix.trim() + " " + prefix.trim(); } } fullName = fullName.trim(); fullName = fullName.replaceAll("^[tT]he ", "").replaceAll("^\\d{4} ", "").replaceAll("[,.]$", ""); String year = entry.getField("year"); fullName = fullName.replaceAll(", " + year + "\\.?", ""); if (!fullName.contains("Abstract") && !fullName.contains("Summaries") && !fullName.contains("Conference Record")) { fullName = "Proc. " + fullName; } } entry.setField(sourceField, fullName); } // clean up abstract String abstr = entry.getField("abstract"); if (abstr != null) { // Try to sort out most of the /spl / conversions // Deal with this specific nested type first abstr = abstr.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$"); abstr = abstr.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$"); // Replace general expressions abstr = abstr.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$"); // Deal with subscripts and superscripts if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) { abstr = abstr.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$"); abstr = abstr.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$"); abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$"); abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$"); } else { abstr = abstr.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}"); abstr = abstr.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}"); abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}"); abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}"); } // Replace \infin with \infty abstr = abstr.replaceAll("\\\\infin", "\\\\infty"); // Write back entry.setField("abstract", abstr); } // Clean up url String url = entry.getField("url"); if (url != null) { entry.setField("url", "http://ieeexplore.ieee.org" + url.replace("tp=&", "")); } return entry; }
public int compare(BibtexEntry e1, BibtexEntry e2) { Object f1, f2; if (isTypeHeader) { // Sort by type. f1 = e1.getType().getName(); f2 = e2.getType().getName(); } else { // If the field is author or editor, we rearrange names so they are // sorted according to last name. f1 = getField(e1); f2 = getField(e2); } /* * [ 1598777 ] Month sorting * * http://sourceforge.net/tracker/index.php?func=detail&aid=1598777&group_id=92314&atid=600306 */ int localMultiplier = multiplier; if (isMonthField) localMultiplier = -localMultiplier; // Catch all cases involving null: if (f1 == null) return f2 == null ? 0 : localMultiplier; if (f2 == null) return -localMultiplier; // Now we now that both f1 and f2 are != null if (isNameField) { f1 = AuthorList.fixAuthorForAlphabetization((String) f1); f2 = AuthorList.fixAuthorForAlphabetization((String) f2); } else if (isYearField) { /* * [ 1285977 ] Impossible to properly sort a numeric field * * http://sourceforge.net/tracker/index.php?func=detail&aid=1285977&group_id=92314&atid=600307 */ f1 = Util.toFourDigitYear((String) f1); f2 = Util.toFourDigitYear((String) f2); } else if (isMonthField) { /* * [ 1535044 ] Month sorting * * http://sourceforge.net/tracker/index.php?func=detail&aid=1535044&group_id=92314&atid=600306 */ f1 = new Integer(Util.getMonthNumber((String) f1)); f2 = new Integer(Util.getMonthNumber((String) f2)); } if (isNumeric) { Integer i1 = null, i2 = null; try { i1 = Integer.parseInt((String) f1); } catch (NumberFormatException ex) { // Parsing failed. } try { i2 = Integer.parseInt((String) f2); } catch (NumberFormatException ex) { // Parsing failed. } if (i2 != null && i1 != null) { // Ok, parsing was successful. Update f1 and f2: f1 = i1; f2 = i2; } else if (i1 != null) { // The first one was parseable, but not the second one. // This means we consider one < two f1 = i1; f2 = new Integer(i1.intValue() + 1); } else if (i2 != null) { // The second one was parseable, but not the first one. // This means we consider one > two f2 = i2; f1 = new Integer(i2.intValue() + 1); } // Else none of them were parseable, and we can fall back on comparing strings. } int result = 0; if ((f1 instanceof Integer) && (f2 instanceof Integer)) { result = (((Integer) f1).compareTo((Integer) f2)); } else if (f2 instanceof Integer) { Integer f1AsInteger = new Integer(f1.toString()); result = -((f1AsInteger).compareTo((Integer) f2)); } else if (f1 instanceof Integer) { Integer f2AsInteger = new Integer(f2.toString()); result = -(((Integer) f1).compareTo(f2AsInteger)); } else { String ours = ((String) f1).toLowerCase(), theirs = ((String) f2).toLowerCase(); result = collator.compare(ours, theirs); // ours.compareTo(theirs); } return result * localMultiplier; }