Пример #1
0
  /**
   * Returns the text stored in the given field of the given bibtex entry which belongs to the given
   * database.
   *
   * <p>If a database is given, this function will try to resolve any string references in the
   * field-value. Also, if a database is given, this function will try to find values for unset
   * fields in the entry linked by the "crossref" field, if any.
   *
   * @param field The field to return the value of.
   * @param bibtex maybenull The bibtex entry which contains the field.
   * @param database maybenull The database of the bibtex entry.
   * @return The resolved field value or null if not found.
   */
  public static String getResolvedField(String field, BibtexEntry bibtex, BibtexDatabase database) {

    if (field.equals("bibtextype")) return bibtex.getType().getName();

    // TODO: Changed this to also consider alias fields, which is the expected
    // behavior for the preview layout and for the check whatever all fields are present.
    // But there might be unwanted side-effects?!
    Object o = bibtex.getFieldOrAlias(field);

    // If this field is not set, and the entry has a crossref, try to look up the
    // field in the referred entry: Do not do this for the bibtex key.
    if ((o == null)
        && (database != null)
        && database.followCrossrefs
        && !field.equals(BibtexFields.KEY_FIELD)) {
      Object crossRef = bibtex.getField("crossref");
      if (crossRef != null) {
        BibtexEntry referred = database.getEntryByKey((String) crossRef);
        if (referred != null) {
          // Ok, we found the referred entry. Get the field value from that
          // entry. If it is unset there, too, stop looking:
          o = referred.getField(field);
        }
      }
    }

    return getText((String) o, database);
  }
Пример #2
0
  private BibtexEntry parseNextEntry(String allText, int startIndex) {
    BibtexEntry entry = null;

    int index = allText.indexOf("<div class=\"detail", piv);
    int endIndex = allText.indexOf("</div>", index);

    if (index >= 0 && endIndex > 0) {
      endIndex += 6;
      piv = endIndex;
      String text = allText.substring(index, endIndex);

      BibtexEntryType type = null;
      String sourceField = null;

      String typeName = "";
      Matcher typeMatcher = typePattern.matcher(text);
      if (typeMatcher.find()) {
        typeName = typeMatcher.group(1);
        if (typeName.equalsIgnoreCase("IEEE Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IEEE Early Access Articles")
            || typeName.equalsIgnoreCase("IET Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AIP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AVS Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IBM Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("TUP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("BIAI Journals &amp; Magazines")) {
          type = BibtexEntryType.getType("article");
          sourceField = "journal";
        } else if (typeName.equalsIgnoreCase("IEEE Conference Publications")
            || typeName.equalsIgnoreCase("IET Conference Publications")
            || typeName.equalsIgnoreCase("VDE Conference Publications")) {
          type = BibtexEntryType.getType("inproceedings");
          sourceField = "booktitle";
        } else if (typeName.equalsIgnoreCase("IEEE Standards")
            || typeName.equalsIgnoreCase("Standards")) {
          type = BibtexEntryType.getType("standard");
          sourceField = "number";
        } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) {
          type = BibtexEntryType.getType("Electronic");
          sourceField = "note";
        } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")
            || typeName.equalsIgnoreCase("MIT Press eBook Chapters")
            || typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
          type = BibtexEntryType.getType("inCollection");
          sourceField = "booktitle";
        }
      }

      if (type == null) {
        type = BibtexEntryType.getType("misc");
        sourceField = "note";
        System.err.println("Type detection failed. Use MISC instead.");
        unparseable++;
        System.err.println(text);
      }

      entry = new BibtexEntry(IdGenerator.next(), type);

      if (typeName.equalsIgnoreCase("IEEE Standards")) {
        entry.setField("organization", "IEEE");
      }

      if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) {
        entry.setField("publisher", "Wiley-IEEE Press");
      } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) {
        entry.setField("publisher", "MIT Press");
      } else if (typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
        entry.setField("publisher", "IEEE USA");
      }

      if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) {
        entry.setField("note", "Early Access");
      }

      Set<String> fields = fieldPatterns.keySet();
      for (String field : fields) {
        Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text);
        if (fieldMatcher.find()) {
          entry.setField(field, htmlConverter.format(fieldMatcher.group(1)));
          if (field.equals("title") && fieldMatcher.find()) {
            String sec_title = htmlConverter.format(fieldMatcher.group(1));
            if (entry.getType() == BibtexEntryType.getStandardType("standard")) {
              sec_title = sec_title.replaceAll("IEEE Std ", "");
            }
            entry.setField(sourceField, sec_title);
          }
          if (field.equals("pages") && fieldMatcher.groupCount() == 2) {
            entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2));
          }
        }
      }

      Matcher authorMatcher = authorPattern.matcher(text);
      // System.out.println(text);
      StringBuilder authorNames = new StringBuilder("");
      int authorCount = 0;
      while (authorMatcher.find()) {
        if (authorCount >= 1) {
          authorNames.append(" and ");
        }
        authorNames.append(htmlConverter.format(authorMatcher.group(1)));
        // System.out.println(authorCount + ": " + authorMatcher.group(1));
        authorCount++;
      }
      entry.setField("author", authorNames.toString());
      if (entry.getField("author") == null
          || entry.getField("author").startsWith("a href")
          || entry
              .getField("author")
              .startsWith("Topic(s)")) { // Fix for some documents without authors
        entry.setField("author", "");
      }
      if (entry.getType() == BibtexEntryType.getStandardType("inproceedings")
          && entry.getField("author").equals("")) {
        entry.setType(BibtexEntryType.getStandardType("proceedings"));
      }

      if (includeAbstract) {
        index = text.indexOf("id=\"abstract");
        if (index >= 0) {
          endIndex = text.indexOf("</div>", index) + 6;

          text = text.substring(index, endIndex);
          Matcher absMatcher = absPattern.matcher(text);
          if (absMatcher.find()) {
            // Clean-up abstract
            String abstr = absMatcher.group(1);
            abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1");

            entry.setField("abstract", htmlConverter.format(abstr));
          }
        }
      }
    }

    if (entry == null) {
      return null;
    } else {
      return cleanup(entry);
    }
  }
Пример #3
0
  private BibtexEntry cleanup(BibtexEntry entry) {
    if (entry == null) {
      return null;
    }

    // clean up title
    String title = entry.getField("title");
    if (title != null) {
      // USe the alt-text and replace image links
      title = title.replaceAll("[ ]?img src=[^ ]+ alt=\"([^\"]+)\">[ ]?", "\\$$1\\$");
      // Try to sort out most of the /spl / conversions
      // Deal with this specific nested type first
      title = title.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$");
      title = title.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$");
      // Replace general expressions
      title = title.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$");
      // Deal with subscripts and superscripts
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) {
        title = title.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$");
        title = title.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$");
        title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$");
        title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$");
      } else {
        title = title.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}");
        title = title.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}");
        title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}");
        title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}");
      }

      // Replace \infin with \infty
      title = title.replaceAll("\\\\infin", "\\\\infty");

      // Unit formatting
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_UNIT_FORMATTER_ON_SEARCH)) {
        title = unitFormatter.format(title);
      }

      // Automatic case keeping
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CASE_KEEPER_ON_SEARCH)) {
        title = caseKeeper.format(title);
      }
      // Write back
      entry.setField("title", title);
    }

    // clean up author
    /*   	String author = (String)entry.getField("author");
       	if (author != null) {
        if (author.indexOf("a href=") >= 0) {  // Author parsing failed because it was empty
    	entry.setField("author","");  // Maybe not needed anymore due to another change
        } else {
        	author = author.replaceAll("\\s+", " ");
        	author = author.replaceAll("\\.", ". ");
        	author = author.replaceAll("([^;]+),([^;]+),([^;]+)","$1,$3,$2"); // Change order in case of Jr. etc
        	author = author.replaceAll("  ", " ");
        	author = author.replaceAll("\\. -", ".-");
                   author = author.replaceAll("; ", " and ");
        	author = author.replaceAll(" ,", ",");
        	author = author.replaceAll("  ", " ");
        	author = author.replaceAll("[ ,;]+$", "");
        	entry.setField("author", author);
        }
    }*/
    // clean up month
    String month = entry.getField("month");
    if (month != null && !month.isEmpty()) {
      month = month.replaceAll("\\.", "");
      month = month.toLowerCase();

      Pattern monthPattern = Pattern.compile("(\\d*+)\\s*([a-z]*+)-*(\\d*+)\\s*([a-z]*+)");
      Matcher mm = monthPattern.matcher(month);
      String date = month;
      if (mm.find()) {
        if (mm.group(3).isEmpty()) {
          if (!mm.group(2).isEmpty()) {
            date = "#" + mm.group(2).substring(0, 3) + "#";
            if (!mm.group(1).isEmpty()) {
              date += " " + mm.group(1) + ",";
            }
          } else {
            date = mm.group(1) + ",";
          }
        } else if (mm.group(2).isEmpty()) {
          if (!mm.group(4).isEmpty()) {
            date =
                "#" + mm.group(4).substring(0, 3) + "# " + mm.group(1) + "--" + mm.group(3) + ",";
          } else {
            date += ",";
          }
        } else {
          date =
              "#"
                  + mm.group(2).substring(0, 3)
                  + "# "
                  + mm.group(1)
                  + "--#"
                  + mm.group(4).substring(0, 3)
                  + "# "
                  + mm.group(3)
                  + ",";
        }
      }
      // date = date.trim();
      // if (!date.isEmpty()) {
      entry.setField("month", date);
      // }
    }

    // clean up pages
    String field = "pages";
    String pages = entry.getField(field);
    if (pages != null) {
      String[] pageNumbers = pages.split("-");
      if (pageNumbers.length == 2) {
        if (pageNumbers[0].equals(pageNumbers[1])) { // single page
          entry.setField(field, pageNumbers[0]);
        } else {
          entry.setField(field, pages.replaceAll("-", "--"));
        }
      }
    }

    // clean up publication field
    BibtexEntryType type = entry.getType();
    String sourceField = "";
    if (type.getName().equals("Article")) {
      sourceField = "journal";
      entry.clearField("booktitle");
    } else if (type.getName().equals("Inproceedings")) {
      sourceField = "booktitle";
    }
    String fullName = entry.getField(sourceField);
    if (fullName != null) {
      if (type.getName().equals("Article")) {
        int ind = fullName.indexOf(": Accepted for future publication");
        if (ind > 0) {
          fullName = fullName.substring(0, ind);
          entry.setField("year", "to be published");
          entry.clearField("month");
          entry.clearField("pages");
          entry.clearField("number");
        }
        String[] parts = fullName.split("[\\[\\]]"); // [see also...], [legacy...]
        fullName = parts[0];
        if (parts.length == 3) {
          fullName += parts[2];
        }
        if (entry.getField("note").equals("Early Access")) {
          entry.setField("year", "to be published");
          entry.clearField("month");
          entry.clearField("pages");
          entry.clearField("number");
        }
      } else {
        fullName =
            fullName
                .replace("Conference Proceedings", "Proceedings")
                .replace("Proceedings of", "Proceedings")
                .replace("Proceedings.", "Proceedings");
        fullName = fullName.replaceAll("International", "Int.");
        fullName = fullName.replaceAll("Symposium", "Symp.");
        fullName = fullName.replaceAll("Conference", "Conf.");
        fullName = fullName.replaceAll(" on", " ").replace("  ", " ");
      }

      Matcher m1 = publicationPattern.matcher(fullName);
      String abrvPattern = ".*[^,] '?\\d+\\)?";
      if (m1.find()) {
        String prefix = m1.group(2).trim();
        String postfix = m1.group(1).trim();
        String abrv = "";
        String[] parts = prefix.split("\\. ", 2);
        if (parts.length == 2) {
          if (parts[0].matches(abrvPattern)) {
            prefix = parts[1];
            abrv = parts[0];
          } else {
            prefix = parts[0];
            abrv = parts[1];
          }
        }
        if (!prefix.matches(abrvPattern)) {
          fullName = prefix + " " + postfix + " " + abrv;
          fullName = fullName.trim();
        } else {
          fullName = postfix + " " + prefix;
        }
      }
      if (type.getName().equals("Article")) {
        fullName = fullName.replace(" - ", "-"); // IEE Proceedings-

        fullName = fullName.trim();
        if (Globals.prefs.getBoolean(JabRefPreferences.USE_IEEE_ABRV)) {
          fullName = Globals.journalAbbrev.getMedlineAbbreviation(fullName).orElse(fullName);
        }
      }
      if (type.getName().equals("Inproceedings")) {
        Matcher m2 = proceedingPattern.matcher(fullName);
        if (m2.find()) {
          String prefix = m2.group(2);
          String postfix = m2.group(1).replaceAll("\\.$", "");
          if (!prefix.matches(abrvPattern)) {
            String abrv = "";

            String[] parts = postfix.split("\\. ", 2);
            if (parts.length == 2) {
              if (parts[0].matches(abrvPattern)) {
                postfix = parts[1];
                abrv = parts[0];
              } else {
                postfix = parts[0];
                abrv = parts[1];
              }
            }
            fullName = prefix.trim() + " " + postfix.trim() + " " + abrv;

          } else {
            fullName = postfix.trim() + " " + prefix.trim();
          }
        }

        fullName = fullName.trim();

        fullName =
            fullName.replaceAll("^[tT]he ", "").replaceAll("^\\d{4} ", "").replaceAll("[,.]$", "");
        String year = entry.getField("year");
        fullName = fullName.replaceAll(", " + year + "\\.?", "");

        if (!fullName.contains("Abstract")
            && !fullName.contains("Summaries")
            && !fullName.contains("Conference Record")) {
          fullName = "Proc. " + fullName;
        }
      }
      entry.setField(sourceField, fullName);
    }

    // clean up abstract
    String abstr = entry.getField("abstract");
    if (abstr != null) {
      // Try to sort out most of the /spl / conversions
      // Deal with this specific nested type first
      abstr = abstr.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$");
      abstr = abstr.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$");
      // Replace general expressions
      abstr = abstr.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$");
      // Deal with subscripts and superscripts
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) {
        abstr = abstr.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$");
        abstr = abstr.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$");
        abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$");
        abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$");
      } else {
        abstr = abstr.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}");
        abstr = abstr.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}");
        abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}");
        abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}");
      }
      // Replace \infin with \infty
      abstr = abstr.replaceAll("\\\\infin", "\\\\infty");
      // Write back
      entry.setField("abstract", abstr);
    }

    // Clean up url
    String url = entry.getField("url");
    if (url != null) {
      entry.setField("url", "http://ieeexplore.ieee.org" + url.replace("tp=&", ""));
    }
    return entry;
  }
Пример #4
0
  public int compare(BibtexEntry e1, BibtexEntry e2) {
    Object f1, f2;

    if (isTypeHeader) {
      // Sort by type.
      f1 = e1.getType().getName();
      f2 = e2.getType().getName();
    } else {

      // If the field is author or editor, we rearrange names so they are
      // sorted according to last name.
      f1 = getField(e1);
      f2 = getField(e2);
    }

    /*
     * [ 1598777 ] Month sorting
     *
     * http://sourceforge.net/tracker/index.php?func=detail&aid=1598777&group_id=92314&atid=600306
     */
    int localMultiplier = multiplier;
    if (isMonthField) localMultiplier = -localMultiplier;

    // Catch all cases involving null:
    if (f1 == null) return f2 == null ? 0 : localMultiplier;

    if (f2 == null) return -localMultiplier;

    // Now we now that both f1 and f2 are != null
    if (isNameField) {
      f1 = AuthorList.fixAuthorForAlphabetization((String) f1);
      f2 = AuthorList.fixAuthorForAlphabetization((String) f2);
    } else if (isYearField) {
      /*
       * [ 1285977 ] Impossible to properly sort a numeric field
       *
       * http://sourceforge.net/tracker/index.php?func=detail&aid=1285977&group_id=92314&atid=600307
       */
      f1 = Util.toFourDigitYear((String) f1);
      f2 = Util.toFourDigitYear((String) f2);
    } else if (isMonthField) {
      /*
       * [ 1535044 ] Month sorting
       *
       * http://sourceforge.net/tracker/index.php?func=detail&aid=1535044&group_id=92314&atid=600306
       */
      f1 = new Integer(Util.getMonthNumber((String) f1));
      f2 = new Integer(Util.getMonthNumber((String) f2));
    }

    if (isNumeric) {
      Integer i1 = null, i2 = null;
      try {
        i1 = Integer.parseInt((String) f1);
      } catch (NumberFormatException ex) {
        // Parsing failed.
      }

      try {
        i2 = Integer.parseInt((String) f2);
      } catch (NumberFormatException ex) {
        // Parsing failed.
      }

      if (i2 != null && i1 != null) {
        // Ok, parsing was successful. Update f1 and f2:
        f1 = i1;
        f2 = i2;
      } else if (i1 != null) {
        // The first one was parseable, but not the second one.
        // This means we consider one < two
        f1 = i1;
        f2 = new Integer(i1.intValue() + 1);
      } else if (i2 != null) {
        // The second one was parseable, but not the first one.
        // This means we consider one > two
        f2 = i2;
        f1 = new Integer(i2.intValue() + 1);
      }
      // Else none of them were parseable, and we can fall back on comparing strings.
    }

    int result = 0;
    if ((f1 instanceof Integer) && (f2 instanceof Integer)) {
      result = (((Integer) f1).compareTo((Integer) f2));
    } else if (f2 instanceof Integer) {
      Integer f1AsInteger = new Integer(f1.toString());
      result = -((f1AsInteger).compareTo((Integer) f2));
    } else if (f1 instanceof Integer) {
      Integer f2AsInteger = new Integer(f2.toString());
      result = -(((Integer) f1).compareTo(f2AsInteger));
    } else {
      String ours = ((String) f1).toLowerCase(), theirs = ((String) f2).toLowerCase();
      result = collator.compare(ours, theirs); // ours.compareTo(theirs);
    }

    return result * localMultiplier;
  }