Beispiel #1
0
 private void parseField(BibtexEntry entry) throws IOException {
   String key = parseTextToken().toLowerCase();
   // Util.pr("Field: _"+key+"_");
   skipWhitespace();
   consume('=');
   String content = parseFieldContent(key);
   // Now, if the field in question is set up to be fitted automatically
   // with braces around
   // capitals, we should remove those now when reading the field:
   if (Globals.prefs.putBracesAroundCapitals(key)) {
     content = Util.removeBracesAroundCapitals(content);
   }
   if (content.length() > 0) {
     if (entry.getField(key) == null) entry.setField(key, content);
     else {
       // The following hack enables the parser to deal with multiple
       // author or
       // editor lines, stringing them together instead of getting just
       // one of them.
       // Multiple author or editor lines are not allowed by the bibtex
       // format, but
       // at least one online database exports bibtex like that, making
       // it inconvenient
       // for users if JabRef didn't accept it.
       if (key.equals("author") || key.equals("editor"))
         entry.setField(key, entry.getField(key) + " and " + content);
     }
   }
 }
 public BibtexEntry makeBibtexEntry() {
   BibtexEntry e = new BibtexEntry(IdGenerator.next(), BibtexEntryTypes.INCOLLECTION);
   e.setField("title", "Marine finfish larviculture in Europe");
   e.setField("bibtexkey", "shields01");
   e.setField("year", "2001");
   e.setField("author", "Kevin Shields");
   return e;
 }
Beispiel #3
0
  public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException {
    String id = Util.createNeutralId(); // createId(tp, _db);
    BibtexEntry result = new BibtexEntry(id, tp);
    skipWhitespace();
    consume('{', '(');
    int c = peek();
    if ((c != '\n') && (c != '\r')) skipWhitespace();
    String key = null;
    boolean doAgain = true;
    while (doAgain) {
      doAgain = false;
      try {
        if (key != null) key = key + parseKey(); // parseTextToken(),
        else key = parseKey();
      } catch (NoLabelException ex) {
        // This exception will be thrown if the entry lacks a key
        // altogether, like in "@article{ author = { ...".
        // It will also be thrown if a key contains =.
        c = (char) peek();
        if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) {
          String fieldName = ex.getMessage().trim().toLowerCase();
          String cont = parseFieldContent(fieldName);
          result.setField(fieldName, cont);
        } else {
          if (key != null) key = key + ex.getMessage() + "=";
          else key = ex.getMessage() + "=";
          doAgain = true;
        }
      }
    }

    if ((key != null) && key.equals("")) key = null;

    result.setField(BibtexFields.KEY_FIELD, key);
    skipWhitespace();

    while (true) {
      c = peek();
      if ((c == '}') || (c == ')')) {
        break;
      }

      if (c == ',') consume(',');

      skipWhitespace();

      c = peek();
      if ((c == '}') || (c == ')')) {
        break;
      }
      parseField(result);
    }

    consume('}', ')');
    return result;
  }
Beispiel #4
0
  /**
   * Removes matches of searchString in the entry's field. This is only possible if the search
   * expression is not a regExp.
   */
  private void removeMatches(BibtexEntry entry) {
    String content = entry.getField(searchField);
    if (content == null) {
      return; // nothing to modify
    }
    StringBuffer sbOrig = new StringBuffer(content);
    StringBuffer sbLower = new StringBuffer(content.toLowerCase());
    StringBuffer haystack = caseSensitive ? sbOrig : sbLower;
    String needle = caseSensitive ? searchExpression : searchExpression.toLowerCase();
    int i;
    int j;
    int k;
    final String separator = Globals.prefs.get(JabRefPreferences.GROUP_KEYWORD_SEPARATOR);
    while ((i = haystack.indexOf(needle)) >= 0) {
      sbOrig.replace(i, i + needle.length(), "");
      sbLower.replace(i, i + needle.length(), "");
      // reduce spaces at i to 1
      j = i;
      k = i;
      while (j - 1 >= 0 && separator.indexOf(haystack.charAt(j - 1)) >= 0) {
        --j;
      }
      while (k < haystack.length() && separator.indexOf(haystack.charAt(k)) >= 0) {
        ++k;
      }
      sbOrig.replace(j, k, j >= 0 && k < sbOrig.length() ? separator : "");
      sbLower.replace(j, k, j >= 0 && k < sbOrig.length() ? separator : "");
    }

    String result = sbOrig.toString().trim();
    entry.setField(searchField, !result.isEmpty() ? result : null);
  }
Beispiel #5
0
  @Override
  public AbstractUndoableEdit add(BibtexEntry[] entries) {
    if (!supportsAdd()) {
      return null;
    }
    if (entries != null && entries.length > 0) {
      NamedCompound ce = new NamedCompound(Globals.lang("add entries to group"));
      boolean modified = false;
      for (BibtexEntry entry : entries) {
        if (!getSearchRule().applyRule(SearchRule.NULL_QUERY, entry)) {
          String oldContent = entry.getField(searchField);
          String pre = Globals.prefs.get(JabRefPreferences.GROUP_KEYWORD_SEPARATOR);
          String newContent = (oldContent == null ? "" : oldContent + pre) + searchExpression;
          entry.setField(searchField, newContent);

          // Store undo information.
          ce.addEdit(new UndoableFieldChange(entry, searchField, oldContent, newContent));
          modified = true;
        }
      }
      if (modified) {
        ce.end();
      }

      return modified ? ce : null;
    }

    return null;
  }
Beispiel #6
0
  public void testEntryEditorForFieldAnotherAutoCompleter() {
    // construct an EntryEditor ...
    JabRef jabref = TestUtils.getInitializedJabRef();
    BibtexEntry bibtexEntry = new BibtexEntry();
    bibtexEntry.setField("journal", "Testtext");
    FieldEditor authorTextField = new FieldTextArea("journal", "New Testtext");
    EntryEditor editor = new EntryEditor(jabref.jrf, jabref.jrf.basePanel(), bibtexEntry);
    // perform action ...
    editor.storeFieldAction.actionPerformed(new ActionEvent(authorTextField, 0, ""));
    // test content of stored words in autocompleter ...
    AbstractAutoCompleter autoCompleter = jabref.jrf.basePanel().getAutoCompleter("journal");
    assertTrue(autoCompleter.indexContainsWord("New Testtext"));

    TestUtils.closeJabRef();
  }
Beispiel #7
0
 public static void removeDOIfromBibtexEntryField(
     BibtexEntry bes, String fieldName, NamedCompound ce) {
   String origValue = bes.getField(fieldName);
   String value = origValue;
   value = value.replaceAll(REGEXP_DOI_WITH_HTTP_PREFIX, "");
   value = value.replaceAll(REGEXP_PLAINDOI, "");
   value = value.trim();
   if (value.isEmpty()) {
     value = null;
   }
   if (!origValue.equals(value)) {
     ce.addEdit(new UndoableFieldChange(bes, fieldName, origValue, value));
     bes.setField(fieldName, value);
   }
 }
  @Test
  @Ignore
  public void testInsertTestData() throws Exception {

    entry1 = new BibtexEntry();
    JabRefPreferences jabRefPreferences = JabRefPreferences.getInstance();
    ExternalFileType fileType = jabRefPreferences.getExternalFileTypeByExt("PDF");
    FileListEntry fileListEntry =
        new FileListEntry("", ImportDataTest.FILE_IN_DATABASE.getAbsolutePath(), fileType);

    FileListTableModel model = new FileListTableModel();
    model.addEntry(0, fileListEntry);

    entry1.setField("file", model.getStringRepresentation());

    database.insertEntry(entry1);

    // #################### SETUP END ##################### //

    UnlinkedFilesCrawler crawler = new UnlinkedFilesCrawler(database);
    CheckableTreeNode treeNode =
        crawler.searchDirectory(ImportDataTest.EXISTING_FOLDER, new EntryFromPDFCreator());

    Assert.assertNotNull(treeNode);

    /** Select all nodes manually. */
    @SuppressWarnings("unchecked")
    Enumeration<CheckableTreeNode> enumeration = treeNode.breadthFirstEnumeration();
    while (enumeration.hasMoreElements()) {
      CheckableTreeNode nextElement = enumeration.nextElement();
      nextElement.setSelected(true);
    }

    List<File> resultList = getFileListFromNode(treeNode);

    Assert.assertFalse(resultList.isEmpty());
    Assert.assertTrue(resultList.contains(ImportDataTest.FILE_NOT_IN_DATABASE));
    Assert.assertFalse(resultList.contains(ImportDataTest.FILE_IN_DATABASE));
  }
Beispiel #9
0
  /** Parse the entries in the source, and return a List of BibtexEntry objects. */
  @Override
  public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status)
      throws IOException {
    ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>();
    StringBuilder sb = new StringBuilder();

    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));

    String str;

    while ((str = in.readLine()) != null) {
      if (str.length() < 3) {
        continue;
      }

      // begining of a new item
      if (str.substring(0, 6).equals("PMID- ")) {
        sb.append("::").append(str);
      } else {
        String beg = str.substring(0, 6);

        if (beg.indexOf(" ") > 0) {
          sb.append(" ## "); // mark the begining of each field
          sb.append(str);
        } else {
          sb.append("EOLEOL"); // mark the end of each line
          sb.append(str.trim());
        }
      }
    }

    String[] entries = sb.toString().split("::");

    // skip the first entry as it is either empty or has document header
    HashMap<String, String> hm = new HashMap<String, String>();

    for (String entry : entries) {
      String[] fields = entry.split(" ## ");

      if (fields.length == 0) {
        fields = entry.split("\n");
      }

      String Type = "";
      String pages = "";
      String shortauthor = "";
      String fullauthor = "";
      hm.clear();

      for (String field : fields) {
        System.out.println(">>>" + field + "<<<");

        // empty field don't do anything
        if (field.length() <= 2) {
          continue;
        }

        String beg = field.substring(0, 6);
        String value = field.substring(6);
        value = value.trim();

        if (beg.equals("PT  - ")) {
          // PT = value.replaceAll("JOURNAL ARTICLE", "article").replaceAll("Journal Article",
          // "article");
          Type = "article"; // make all of them PT?
        } else if (beg.equals("TY  - ")) {
          if ("CONF".equals(value)) {
            Type = "inproceedings";
          }
        } else if (beg.equals("JO  - ")) {
          hm.put("booktitle", value);
        } else if (beg.equals("FAU - ")) {
          String tmpauthor = value.replaceAll("EOLEOL", " and ");

          // if there is already someone there then append with "and"
          if (!"".equals(fullauthor)) {
            fullauthor = fullauthor + " and " + tmpauthor;
          } else {
            fullauthor = tmpauthor;
          }
        } else if (beg.equals("AU  - ")) {
          String tmpauthor = value.replaceAll("EOLEOL", " and ").replaceAll(" ", ", ");

          // if there is already someone there then append with "and"
          if (!"".equals(shortauthor)) {
            shortauthor = shortauthor + " and " + tmpauthor;
          } else {
            shortauthor = tmpauthor;
          }
        } else if (beg.equals("TI  - ")) {
          hm.put("title", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("TA  - ")) {
          hm.put("journal", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("AB  - ")) {
          hm.put("abstract", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("PG  - ")) {
          pages = value.replaceAll("-", "--");
        } else if (beg.equals("IP  - ")) {
          hm.put("number", value);
        } else if (beg.equals("DP  - ")) {
          String[] parts = value.split(" "); // sometimes this is just year, sometimes full date
          hm.put("year", parts[0]);
        } else if (beg.equals("VI  - ")) {
          hm.put("volume", value);
        } else if (beg.equals("AID - ")) {
          String[] parts = value.split(" ");
          if ("[doi]".equals(parts[1])) {
            hm.put("doi", parts[0]);
            hm.put("url", "http://dx.doi.org/" + parts[0]);
          }
        }
      }

      if (!"".equals(pages)) {
        hm.put("pages", pages);
      }
      if (!"".equals(fullauthor)) {
        hm.put("author", fullauthor);
      } else if (!"".equals(shortauthor)) {
        hm.put("author", shortauthor);
      }

      BibtexEntry b =
          new BibtexEntry(
              BibtexFields.DEFAULT_BIBTEXENTRY_ID,
              Globals.getEntryType(Type)); // id assumes an existing database so don't

      // create one here
      b.setField(hm);

      // the first bibitem is always empty, presumably as a result of trying
      // to parse header informaion. So add only if we have at least author or
      // title fields.
      if ((hm.get("author") != null) || (hm.get("title") != null)) {
        bibitems.add(b);
      }
    }

    return bibitems;
  }
Beispiel #10
0
  public void endElement(String uri, String localName, String qName) {
    if (localName.equals("PubmedArticle")) {
      // bibitems.add( new Bibitem(null, makeBibtexString(), Globals.nextKey(),"-1" )	 );
      // check if year ="" then give medline date instead
      if (year.equals("")) {
        if (!MedlineDate.equals("")) {
          // multi-year date format
          // System.out.println(MedlineDate);
          year = MedlineDate.substring(0, 4);
          // Matcher m = Pattern.compile("\\b[0-9]{4}\\b").matcher(MedlineDate);
          // if(m.matches())
          // year = m.group();
        }
      }

      // Build a string from the collected keywords:
      StringBuffer sb = new StringBuffer();
      for (Iterator<String> iterator = descriptors.iterator(); iterator.hasNext(); ) {
        String s = iterator.next();
        sb.append(s);
        if (iterator.hasNext()) sb.append(KEYWORD_SEPARATOR);
      }
      keywords = sb.toString();

      BibtexEntry b =
          new BibtexEntry(
              Util.createNeutralId(), // Globals.DEFAULT_BIBTEXENTRY_ID,
              Globals.getEntryType(
                  "article")); // id assumes an existing database so don't create one here
      if (!author.equals("")) {
        b.setField(
            "author", htmlConverter.formatUnicode(ImportFormatReader.expandAuthorInitials(author)));
        // b.setField("author",Util.replaceSpecialCharacters(ImportFormatReader.expandAuthorInitials(author)));
        author = "";
      }
      if (!title.equals("")) b.setField("title", htmlConverter.formatUnicode(title));
      // if (!title.equals("")) b.setField("title",Util.replaceSpecialCharacters(title));
      if (!journal.equals("")) b.setField("journal", journal);
      if (!year.equals("")) b.setField("year", year);
      // PENDING [email protected] 2005-05-27 : added call to fixPageRange
      if (!page.equals("")) b.setField("pages", fixPageRange(page));
      if (!volume.equals("")) b.setField("volume", volume);
      if (!language.equals("")) b.setField("language", language);
      if (!pst.equals("")) b.setField("medline-pst", pst);
      if (!abstractText.equals("")) b.setField("abstract", abstractText.replaceAll("%", "\\\\%"));
      if (!keywords.equals("")) b.setField("keywords", keywords);
      if (!month.equals("")) b.setField("month", month);
      // if (!url.equals("")) b.setField("url",url);
      if (!number.equals("")) b.setField("number", number);

      if (!doi.equals("")) {
        b.setField("doi", doi);
        b.setField("url", "http://dx.doi.org/" + doi);
      }
      if (!pii.equals("")) b.setField("pii", pii);
      if (!affiliation.equals("")) {
        b.setField("institution", affiliation.replaceAll("#", "\\\\#"));
      }

      // PENDING [email protected] 2005-05-27 : added "pmid" bibtex field
      // Older references do not have doi entries, but every
      // medline entry has a unique pubmed ID (aka primary ID).
      // Add a bibtex field for the pubmed ID for future use.
      if (!pubmedid.equals("")) b.setField("pmid", pubmedid);

      bibitems.add(b);

      abstractText = "";
      author = "";
      title = "";
      journal = "";
      keywords = "";
      doi = "";
      pii = "";
      year = "";
      forename = "";
      lastName = "";
      suffix = "";
      abstractText = "";
      affiliation = "";
      pubmedid = "";
      majorTopic = "";
      minorTopics = "";
      month = "";
      volume = "";
      language = "";
      pst = "";
      lastname = "";
      suffix = "";
      initials = "";
      number = "";
      page = "";
      medlineID = "";
      url = "";
      MedlineDate = "";
      descriptors.clear();
    } else if (localName.equals("ArticleTitle")) {
      inTitle = false;
    } else if (localName.equals("PubDate")) {
      inPubDate = false;
    } else if (localName.equals("Year")) {
      inYear = false;
    } else if (localName.equals("PMID")) {
      inPubMedID = false;
    } else if (localName.equals("MedlineDate")) {
      inMedlineDate = false;
    } else if (localName.equals("MedlineTA")) {
      inJournal = false;
    } // journal name
    else if (localName.equals("Month")) {
      inMonth = false;
    } else if (localName.equals("Volume")) {
      inVolume = false;
    } else if (localName.equals("Language")) {
      inLanguage = false;
    } else if (localName.equals("PublicationStatus")) {
      inPst = false;
    } else if (localName.equals("AuthorList")) {
      author = join(authors.toArray(), " and ");
      inAuthorList = false;
    } else if (localName.equals("Author")) {
      // forename sometimes has initials with " " in middle: is pattern [A-Z] [A-Z]
      // when above is the case replace it with initials
      if (forename.length() == 3 && forename.charAt(1) == ' ') {
        forename = initials;
      }

      // Put together name with last name first, and enter suffix in between if present:
      if (lastname.indexOf(" ") > 0) author = "{" + lastname + "}";
      else author = lastname;

      if (suffix.length() > 0) author = author + ", " + suffix;
      if (forename.length() > 0) author = author + ", " + forename;

      // author = initials + " " + lastname;
      authors.add(author);
      inAuthor = false;
      forename = "";
      initials = "";
      lastname = "";
      suffix = "";
    } else if (localName.equals("DescriptorName")) inDescriptorName = false;
    else if (localName.equals("QualifierName")) inQualifierName = false;
    else if (localName.equals("MeshHeading")) {
      inMeshHeader = false;
      if (minorTopics.equals("")) descriptors.add(majorTopic);
      else descriptors.add(majorTopic + ", " + minorTopics);
    } else if (localName.equals("LastName")) {
      inLastName = false;
    } else if (localName.equals("Suffix")) {
      inSuffix = false;
    } else if (localName.equals("ForeName") || localName.equals("FirstName")) {
      inForename = false;
    } else if (localName.equals("Issue")) {
      inIssue = false;
    } else if (localName.equals("MedlinePgn")) {
      inMedlinePgn = false;
    } // pagenumber
    else if (localName.equals("URL")) {
      inUrl = false;
    } else if (localName.equals("Initials")) {
      // initials= '.' + initials + '.';
      inInitials = false;
    } else if (localName.equals("AbstractText")) {
      inAbstractText = false;
    } else if (localName.equals("Affiliation")) {
      inAffiliation = false;
    } else if (localName.equals("ArticleId")) {
      if (inDoi) inDoi = false;
      else if (inPii) inPii = false;
    }
  }
Beispiel #11
0
  private BibtexEntry parseNextEntry(String allText, int startIndex) {
    BibtexEntry entry = null;

    int index = allText.indexOf("<div class=\"detail", piv);
    int endIndex = allText.indexOf("</div>", index);

    if (index >= 0 && endIndex > 0) {
      endIndex += 6;
      piv = endIndex;
      String text = allText.substring(index, endIndex);

      BibtexEntryType type = null;
      String sourceField = null;

      String typeName = "";
      Matcher typeMatcher = typePattern.matcher(text);
      if (typeMatcher.find()) {
        typeName = typeMatcher.group(1);
        if (typeName.equalsIgnoreCase("IEEE Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IEEE Early Access Articles")
            || typeName.equalsIgnoreCase("IET Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AIP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AVS Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IBM Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("TUP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("BIAI Journals &amp; Magazines")) {
          type = BibtexEntryType.getType("article");
          sourceField = "journal";
        } else if (typeName.equalsIgnoreCase("IEEE Conference Publications")
            || typeName.equalsIgnoreCase("IET Conference Publications")
            || typeName.equalsIgnoreCase("VDE Conference Publications")) {
          type = BibtexEntryType.getType("inproceedings");
          sourceField = "booktitle";
        } else if (typeName.equalsIgnoreCase("IEEE Standards")
            || typeName.equalsIgnoreCase("Standards")) {
          type = BibtexEntryType.getType("standard");
          sourceField = "number";
        } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) {
          type = BibtexEntryType.getType("Electronic");
          sourceField = "note";
        } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")
            || typeName.equalsIgnoreCase("MIT Press eBook Chapters")
            || typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
          type = BibtexEntryType.getType("inCollection");
          sourceField = "booktitle";
        }
      }

      if (type == null) {
        type = BibtexEntryType.getType("misc");
        sourceField = "note";
        System.err.println("Type detection failed. Use MISC instead.");
        unparseable++;
        System.err.println(text);
      }

      entry = new BibtexEntry(IdGenerator.next(), type);

      if (typeName.equalsIgnoreCase("IEEE Standards")) {
        entry.setField("organization", "IEEE");
      }

      if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) {
        entry.setField("publisher", "Wiley-IEEE Press");
      } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) {
        entry.setField("publisher", "MIT Press");
      } else if (typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
        entry.setField("publisher", "IEEE USA");
      }

      if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) {
        entry.setField("note", "Early Access");
      }

      Set<String> fields = fieldPatterns.keySet();
      for (String field : fields) {
        Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text);
        if (fieldMatcher.find()) {
          entry.setField(field, htmlConverter.format(fieldMatcher.group(1)));
          if (field.equals("title") && fieldMatcher.find()) {
            String sec_title = htmlConverter.format(fieldMatcher.group(1));
            if (entry.getType() == BibtexEntryType.getStandardType("standard")) {
              sec_title = sec_title.replaceAll("IEEE Std ", "");
            }
            entry.setField(sourceField, sec_title);
          }
          if (field.equals("pages") && fieldMatcher.groupCount() == 2) {
            entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2));
          }
        }
      }

      Matcher authorMatcher = authorPattern.matcher(text);
      // System.out.println(text);
      StringBuilder authorNames = new StringBuilder("");
      int authorCount = 0;
      while (authorMatcher.find()) {
        if (authorCount >= 1) {
          authorNames.append(" and ");
        }
        authorNames.append(htmlConverter.format(authorMatcher.group(1)));
        // System.out.println(authorCount + ": " + authorMatcher.group(1));
        authorCount++;
      }
      entry.setField("author", authorNames.toString());
      if (entry.getField("author") == null
          || entry.getField("author").startsWith("a href")
          || entry
              .getField("author")
              .startsWith("Topic(s)")) { // Fix for some documents without authors
        entry.setField("author", "");
      }
      if (entry.getType() == BibtexEntryType.getStandardType("inproceedings")
          && entry.getField("author").equals("")) {
        entry.setType(BibtexEntryType.getStandardType("proceedings"));
      }

      if (includeAbstract) {
        index = text.indexOf("id=\"abstract");
        if (index >= 0) {
          endIndex = text.indexOf("</div>", index) + 6;

          text = text.substring(index, endIndex);
          Matcher absMatcher = absPattern.matcher(text);
          if (absMatcher.find()) {
            // Clean-up abstract
            String abstr = absMatcher.group(1);
            abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1");

            entry.setField("abstract", htmlConverter.format(abstr));
          }
        }
      }
    }

    if (entry == null) {
      return null;
    } else {
      return cleanup(entry);
    }
  }
Beispiel #12
0
  private BibtexEntry cleanup(BibtexEntry entry) {
    if (entry == null) {
      return null;
    }

    // clean up title
    String title = entry.getField("title");
    if (title != null) {
      // USe the alt-text and replace image links
      title = title.replaceAll("[ ]?img src=[^ ]+ alt=\"([^\"]+)\">[ ]?", "\\$$1\\$");
      // Try to sort out most of the /spl / conversions
      // Deal with this specific nested type first
      title = title.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$");
      title = title.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$");
      // Replace general expressions
      title = title.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$");
      // Deal with subscripts and superscripts
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) {
        title = title.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$");
        title = title.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$");
        title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$");
        title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$");
      } else {
        title = title.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}");
        title = title.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}");
        title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}");
        title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}");
      }

      // Replace \infin with \infty
      title = title.replaceAll("\\\\infin", "\\\\infty");

      // Unit formatting
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_UNIT_FORMATTER_ON_SEARCH)) {
        title = unitFormatter.format(title);
      }

      // Automatic case keeping
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CASE_KEEPER_ON_SEARCH)) {
        title = caseKeeper.format(title);
      }
      // Write back
      entry.setField("title", title);
    }

    // clean up author
    /*   	String author = (String)entry.getField("author");
       	if (author != null) {
        if (author.indexOf("a href=") >= 0) {  // Author parsing failed because it was empty
    	entry.setField("author","");  // Maybe not needed anymore due to another change
        } else {
        	author = author.replaceAll("\\s+", " ");
        	author = author.replaceAll("\\.", ". ");
        	author = author.replaceAll("([^;]+),([^;]+),([^;]+)","$1,$3,$2"); // Change order in case of Jr. etc
        	author = author.replaceAll("  ", " ");
        	author = author.replaceAll("\\. -", ".-");
                   author = author.replaceAll("; ", " and ");
        	author = author.replaceAll(" ,", ",");
        	author = author.replaceAll("  ", " ");
        	author = author.replaceAll("[ ,;]+$", "");
        	entry.setField("author", author);
        }
    }*/
    // clean up month
    String month = entry.getField("month");
    if (month != null && !month.isEmpty()) {
      month = month.replaceAll("\\.", "");
      month = month.toLowerCase();

      Pattern monthPattern = Pattern.compile("(\\d*+)\\s*([a-z]*+)-*(\\d*+)\\s*([a-z]*+)");
      Matcher mm = monthPattern.matcher(month);
      String date = month;
      if (mm.find()) {
        if (mm.group(3).isEmpty()) {
          if (!mm.group(2).isEmpty()) {
            date = "#" + mm.group(2).substring(0, 3) + "#";
            if (!mm.group(1).isEmpty()) {
              date += " " + mm.group(1) + ",";
            }
          } else {
            date = mm.group(1) + ",";
          }
        } else if (mm.group(2).isEmpty()) {
          if (!mm.group(4).isEmpty()) {
            date =
                "#" + mm.group(4).substring(0, 3) + "# " + mm.group(1) + "--" + mm.group(3) + ",";
          } else {
            date += ",";
          }
        } else {
          date =
              "#"
                  + mm.group(2).substring(0, 3)
                  + "# "
                  + mm.group(1)
                  + "--#"
                  + mm.group(4).substring(0, 3)
                  + "# "
                  + mm.group(3)
                  + ",";
        }
      }
      // date = date.trim();
      // if (!date.isEmpty()) {
      entry.setField("month", date);
      // }
    }

    // clean up pages
    String field = "pages";
    String pages = entry.getField(field);
    if (pages != null) {
      String[] pageNumbers = pages.split("-");
      if (pageNumbers.length == 2) {
        if (pageNumbers[0].equals(pageNumbers[1])) { // single page
          entry.setField(field, pageNumbers[0]);
        } else {
          entry.setField(field, pages.replaceAll("-", "--"));
        }
      }
    }

    // clean up publication field
    BibtexEntryType type = entry.getType();
    String sourceField = "";
    if (type.getName().equals("Article")) {
      sourceField = "journal";
      entry.clearField("booktitle");
    } else if (type.getName().equals("Inproceedings")) {
      sourceField = "booktitle";
    }
    String fullName = entry.getField(sourceField);
    if (fullName != null) {
      if (type.getName().equals("Article")) {
        int ind = fullName.indexOf(": Accepted for future publication");
        if (ind > 0) {
          fullName = fullName.substring(0, ind);
          entry.setField("year", "to be published");
          entry.clearField("month");
          entry.clearField("pages");
          entry.clearField("number");
        }
        String[] parts = fullName.split("[\\[\\]]"); // [see also...], [legacy...]
        fullName = parts[0];
        if (parts.length == 3) {
          fullName += parts[2];
        }
        if (entry.getField("note").equals("Early Access")) {
          entry.setField("year", "to be published");
          entry.clearField("month");
          entry.clearField("pages");
          entry.clearField("number");
        }
      } else {
        fullName =
            fullName
                .replace("Conference Proceedings", "Proceedings")
                .replace("Proceedings of", "Proceedings")
                .replace("Proceedings.", "Proceedings");
        fullName = fullName.replaceAll("International", "Int.");
        fullName = fullName.replaceAll("Symposium", "Symp.");
        fullName = fullName.replaceAll("Conference", "Conf.");
        fullName = fullName.replaceAll(" on", " ").replace("  ", " ");
      }

      Matcher m1 = publicationPattern.matcher(fullName);
      String abrvPattern = ".*[^,] '?\\d+\\)?";
      if (m1.find()) {
        String prefix = m1.group(2).trim();
        String postfix = m1.group(1).trim();
        String abrv = "";
        String[] parts = prefix.split("\\. ", 2);
        if (parts.length == 2) {
          if (parts[0].matches(abrvPattern)) {
            prefix = parts[1];
            abrv = parts[0];
          } else {
            prefix = parts[0];
            abrv = parts[1];
          }
        }
        if (!prefix.matches(abrvPattern)) {
          fullName = prefix + " " + postfix + " " + abrv;
          fullName = fullName.trim();
        } else {
          fullName = postfix + " " + prefix;
        }
      }
      if (type.getName().equals("Article")) {
        fullName = fullName.replace(" - ", "-"); // IEE Proceedings-

        fullName = fullName.trim();
        if (Globals.prefs.getBoolean(JabRefPreferences.USE_IEEE_ABRV)) {
          fullName = Globals.journalAbbrev.getMedlineAbbreviation(fullName).orElse(fullName);
        }
      }
      if (type.getName().equals("Inproceedings")) {
        Matcher m2 = proceedingPattern.matcher(fullName);
        if (m2.find()) {
          String prefix = m2.group(2);
          String postfix = m2.group(1).replaceAll("\\.$", "");
          if (!prefix.matches(abrvPattern)) {
            String abrv = "";

            String[] parts = postfix.split("\\. ", 2);
            if (parts.length == 2) {
              if (parts[0].matches(abrvPattern)) {
                postfix = parts[1];
                abrv = parts[0];
              } else {
                postfix = parts[0];
                abrv = parts[1];
              }
            }
            fullName = prefix.trim() + " " + postfix.trim() + " " + abrv;

          } else {
            fullName = postfix.trim() + " " + prefix.trim();
          }
        }

        fullName = fullName.trim();

        fullName =
            fullName.replaceAll("^[tT]he ", "").replaceAll("^\\d{4} ", "").replaceAll("[,.]$", "");
        String year = entry.getField("year");
        fullName = fullName.replaceAll(", " + year + "\\.?", "");

        if (!fullName.contains("Abstract")
            && !fullName.contains("Summaries")
            && !fullName.contains("Conference Record")) {
          fullName = "Proc. " + fullName;
        }
      }
      entry.setField(sourceField, fullName);
    }

    // clean up abstract
    String abstr = entry.getField("abstract");
    if (abstr != null) {
      // Try to sort out most of the /spl / conversions
      // Deal with this specific nested type first
      abstr = abstr.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$");
      abstr = abstr.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$");
      // Replace general expressions
      abstr = abstr.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$");
      // Deal with subscripts and superscripts
      if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) {
        abstr = abstr.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$");
        abstr = abstr.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$");
        abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$");
        abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$");
      } else {
        abstr = abstr.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}");
        abstr = abstr.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}");
        abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}");
        abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}");
      }
      // Replace \infin with \infty
      abstr = abstr.replaceAll("\\\\infin", "\\\\infty");
      // Write back
      entry.setField("abstract", abstr);
    }

    // Clean up url
    String url = entry.getField("url");
    if (url != null) {
      entry.setField("url", "http://ieeexplore.ieee.org" + url.replace("tp=&", ""));
    }
    return entry;
  }
Beispiel #13
0
  /** Parse the entries in the source, and return a List of BibtexEntry objects. */
  public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status)
      throws IOException {
    ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>();
    StringBuffer sb = new StringBuffer();
    HashMap<String, String> hm = new HashMap<String, String>();

    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));

    String Type = null;
    String str;
    boolean first = true;
    line = 1;
    str = readLine(in);
    while (true) {
      if (str == null || str.length() == 0) { // end of record
        if (!hm.isEmpty()) { // have a record
          if (Type == null) {
            addNote(hm, "Publication Type: [NOT SPECIFIED]");
            addNote(hm, "[PERHAPS NOT FULL FORMAT]");
            Type = "article";
          }

          // post-process Journal article
          if (Type.equals("article") && hm.get("booktitle") != null) {
            String booktitle = hm.get("booktitle");
            hm.remove("booktitle");
            hm.put("journal", booktitle);
          }

          BibtexEntry b =
              new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals.getEntryType(Type));

          // create one here
          b.setField(hm);

          bibitems.add(b);
        }
        hm.clear(); // ready for next record
        first = true;
        if (str == null) break; // end of file
        str = readLine(in);
        continue;
      }

      int fline = line; // save this before reading field contents
      Matcher fm = FIELD_PATTERN.matcher(str);
      if (fm.find()) {

        // save the field name (long and short)
        String fabbr = fm.group(1);
        String fname = fm.group(2);

        // read the contents of the field
        sb.setLength(0); // clear the buffer
        while ((str = readLine(in)) != null) {
          if (!str.startsWith("    ")) // field contents?
          break; // nope
          if (sb.length() > 0) {
            sb.append(" ");
          }
          sb.append(str.substring(4)); // skip spaces
        }
        String fstr = sb.toString();
        if (fstr.length() == 0) {
          int line1 = line - 1;
          throw new IOException("illegal empty field at line " + line1);
        }
        // replace [Lt] with <
        fm = LT_PATTERN.matcher(fstr);
        if (fm.find()) fstr = fm.replaceAll("<");

        // check for start of new record
        if (fabbr.equals("DN") && fname.equalsIgnoreCase("Database Name")) {
          if (!first) {
            throw new IOException("format error at line " + fline + ": DN out of order");
          }
          first = false;
        } else if (first) {
          throw new IOException("format error at line " + fline + ": missing DN");
        }

        if (fabbr.equals("PT")) {
          Type = null;
          String flow = fstr.toLowerCase();
          String[] types = flow.split("; ");
          for (String type : types) {
            if ((type.contains("article")) || (type.contains("journal article"))) {
              Type = "article";
              break;
            } else if (type.equals("dissertation")) {
              Type = "phdthesis";
              break;
            } else if (type.equals("conference")) {
              Type = "inproceedings";
              break;
            } else if (type.equals("book monograph") && Type == null) {
              Type = "book";
              break;
            } else if (type.equals("report") && Type == null) {
              Type = "techreport";
              break;
            }
          }
          if (Type == null) {
            Type = "misc";
          }
        }

        String ftype = null;
        if (fabbr.equals("AB")) ftype = "abstract";
        else if (fabbr.equals("AF")) ftype = "affiliation";
        else if (fabbr.equals("AU")) {
          ftype = "author";
          if (fstr.contains(";")) fstr = fstr.replaceAll("; ", " and ");
        } else if (fabbr.equals("CA")) ftype = "organization";
        else if (fabbr.equals("DE")) ftype = "keywords";
        else if (fabbr.equals("DO")) ftype = "doi";
        else if (fabbr.equals("ED")) ftype = "editor";
        else if (fabbr.equals("IB")) ftype = "ISBN";
        else if (fabbr.equals("IS")) ftype = "ISSN";
        else if (fabbr.equals("JN")) ftype = "journal";
        else if (fabbr.equals("LA")) ftype = "language";
        else if (fabbr.equals("PB")) ftype = "publisher";
        else if (fabbr.equals("PY")) {
          ftype = "year";
          if (hm.get("year") != null) {
            String oyear = hm.get("year");
            if (!fstr.equals(oyear)) {
              addNote(hm, "Source Year: " + oyear + ".");
              //			    System.out.println(fstr + " != " + oyear);
            }
          }
        } else if (fabbr.equals("RL")) {
          ftype = "url";
          String[] lines = fstr.split(" ");
          StringBuilder urls = new StringBuilder();
          for (int ii = 0; ii < lines.length; ++ii) {
            if (lines[ii].startsWith("[URL:")) urls.append(lines[ii].substring(5));
            else if (lines[ii].endsWith("]")) {
              int len = lines[ii].length();
              urls.append(lines[ii].substring(0, len - 1));
              if (ii < lines.length - 1) urls.append("\n");
            } else urls.append(lines[ii]);
          }
          fstr = urls.toString();
        } else if (fabbr.equals("SO")) {
          ftype = "booktitle";

          // see if we can extract journal information

          // compact vol(no):page-page:
          Matcher pm = VOLNOPP_PATTERN.matcher(fstr);
          if (pm.find()) {
            hm.put("volume", pm.group(1));
            hm.put("number", pm.group(2));
            hm.put("pages", pm.group(3));
            fstr = pm.replaceFirst("");
          }

          // pages
          pm = PAGES_PATTERN.matcher(fstr);
          StringBuilder pages = new StringBuilder();
          while (pm.find()) {
            if (pages.length() > 0) pages.append(",");
            String pp = pm.group(1);
            if (pp == null) pp = pm.group(2);
            if (pp == null) pp = pm.group(3);
            pages.append(pp);
            fstr = pm.replaceFirst("");
            pm = PAGES_PATTERN.matcher(fstr);
          }
          if (pages.length() > 0) hm.put("pages", pages.toString());

          // volume:
          pm = VOLUME_PATTERN.matcher(fstr);
          if (pm.find()) {
            hm.put("volume", pm.group(1));
            fstr = pm.replaceFirst("");
          }

          // number:
          pm = NUMBER_PATTERN.matcher(fstr);
          if (pm.find()) {
            hm.put("number", pm.group(1));
            fstr = pm.replaceFirst("");
          }

          // journal date:
          fstr = parseDate(hm, fstr);

          // strip trailing whitespace
          Pattern pp = Pattern.compile(",?\\s*$");
          pm = pp.matcher(fstr);
          if (pm.find()) fstr = pm.replaceFirst("");

          if (fstr.equals("")) continue;
          //		    System.out.println("SOURCE: \"" + fstr + "\"");
        } else if (fabbr.equals("TI")) ftype = "title";
        else if (fabbr.equals("RE")) continue; // throw away References

        if (ftype != null) {
          hm.put(ftype, fstr);
        } else {
          addNote(hm, fname + ": " + fstr + ".");
        }
      } else str = readLine(in);
    }

    return bibitems;
  }
Beispiel #14
0
 private void insertFields(String[] fields, BibtexEntry entry, XmlDocument xmlDocument) {
   DocumentWrapper document = new DocumentWrapper(xmlDocument);
   for (String field : fields) {
     if (entry.getField(field) != null) {
       continue;
     }
     if (field.equalsIgnoreCase("author")) {
       entry.setField(field, document.getAuthors("and"));
     }
     if (field.equalsIgnoreCase("title")) {
       entry.setField(field, document.getTitle());
     }
     if (field.equalsIgnoreCase("abstract")) {
       entry.setField(field, document.getAbstract());
     }
     if (field.equalsIgnoreCase("keywords")) {
       entry.setField(field, document.getKeyWords());
     }
     if (field.equalsIgnoreCase("doi")) {
       entry.setField(field, document.getDoi());
     }
     if (field.equalsIgnoreCase("pages")) {
       entry.setField(field, document.getPages());
     }
     if (field.equalsIgnoreCase("volume")) {
       entry.setField(field, document.getVolume());
     }
     if (field.equalsIgnoreCase("number")) {
       entry.setField(field, document.getNumber());
     }
     if (field.equalsIgnoreCase("year")) {
       entry.setField(field, document.getYear());
     }
     if (field.equalsIgnoreCase("month")) {
       entry.setField(field, document.getMonth());
     }
     if (field.equalsIgnoreCase("day")) {
       entry.setField(field, document.getDay());
     }
     if (field.equalsIgnoreCase("booktitle")) {
       entry.setField(field, document.getVenue());
     }
     if (field.equalsIgnoreCase("journal")) {
       entry.setField(field, document.getVenue());
     }
   }
 }