コード例 #1
0
  private BibtexEntry parseNextEntry(String allText, int startIndex) {
    BibtexEntry entry = null;

    int index = allText.indexOf("<div class=\"detail", piv);
    int endIndex = allText.indexOf("</div>", index);

    if (index >= 0 && endIndex > 0) {
      endIndex += 6;
      piv = endIndex;
      String text = allText.substring(index, endIndex);

      BibtexEntryType type = null;
      String sourceField = null;

      String typeName = "";
      Matcher typeMatcher = typePattern.matcher(text);
      if (typeMatcher.find()) {
        typeName = typeMatcher.group(1);
        if (typeName.equalsIgnoreCase("IEEE Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IEEE Early Access Articles")
            || typeName.equalsIgnoreCase("IET Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AIP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("AVS Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("IBM Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("TUP Journals &amp; Magazines")
            || typeName.equalsIgnoreCase("BIAI Journals &amp; Magazines")) {
          type = BibtexEntryType.getType("article");
          sourceField = "journal";
        } else if (typeName.equalsIgnoreCase("IEEE Conference Publications")
            || typeName.equalsIgnoreCase("IET Conference Publications")
            || typeName.equalsIgnoreCase("VDE Conference Publications")) {
          type = BibtexEntryType.getType("inproceedings");
          sourceField = "booktitle";
        } else if (typeName.equalsIgnoreCase("IEEE Standards")
            || typeName.equalsIgnoreCase("Standards")) {
          type = BibtexEntryType.getType("standard");
          sourceField = "number";
        } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) {
          type = BibtexEntryType.getType("Electronic");
          sourceField = "note";
        } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")
            || typeName.equalsIgnoreCase("MIT Press eBook Chapters")
            || typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
          type = BibtexEntryType.getType("inCollection");
          sourceField = "booktitle";
        }
      }

      if (type == null) {
        type = BibtexEntryType.getType("misc");
        sourceField = "note";
        System.err.println("Type detection failed. Use MISC instead.");
        unparseable++;
        System.err.println(text);
      }

      entry = new BibtexEntry(IdGenerator.next(), type);

      if (typeName.equalsIgnoreCase("IEEE Standards")) {
        entry.setField("organization", "IEEE");
      }

      if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) {
        entry.setField("publisher", "Wiley-IEEE Press");
      } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) {
        entry.setField("publisher", "MIT Press");
      } else if (typeName.equalsIgnoreCase("IEEE USA Books &amp; eBooks")) {
        entry.setField("publisher", "IEEE USA");
      }

      if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) {
        entry.setField("note", "Early Access");
      }

      Set<String> fields = fieldPatterns.keySet();
      for (String field : fields) {
        Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text);
        if (fieldMatcher.find()) {
          entry.setField(field, htmlConverter.format(fieldMatcher.group(1)));
          if (field.equals("title") && fieldMatcher.find()) {
            String sec_title = htmlConverter.format(fieldMatcher.group(1));
            if (entry.getType() == BibtexEntryType.getStandardType("standard")) {
              sec_title = sec_title.replaceAll("IEEE Std ", "");
            }
            entry.setField(sourceField, sec_title);
          }
          if (field.equals("pages") && fieldMatcher.groupCount() == 2) {
            entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2));
          }
        }
      }

      Matcher authorMatcher = authorPattern.matcher(text);
      // System.out.println(text);
      StringBuilder authorNames = new StringBuilder("");
      int authorCount = 0;
      while (authorMatcher.find()) {
        if (authorCount >= 1) {
          authorNames.append(" and ");
        }
        authorNames.append(htmlConverter.format(authorMatcher.group(1)));
        // System.out.println(authorCount + ": " + authorMatcher.group(1));
        authorCount++;
      }
      entry.setField("author", authorNames.toString());
      if (entry.getField("author") == null
          || entry.getField("author").startsWith("a href")
          || entry
              .getField("author")
              .startsWith("Topic(s)")) { // Fix for some documents without authors
        entry.setField("author", "");
      }
      if (entry.getType() == BibtexEntryType.getStandardType("inproceedings")
          && entry.getField("author").equals("")) {
        entry.setType(BibtexEntryType.getStandardType("proceedings"));
      }

      if (includeAbstract) {
        index = text.indexOf("id=\"abstract");
        if (index >= 0) {
          endIndex = text.indexOf("</div>", index) + 6;

          text = text.substring(index, endIndex);
          Matcher absMatcher = absPattern.matcher(text);
          if (absMatcher.find()) {
            // Clean-up abstract
            String abstr = absMatcher.group(1);
            abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1");

            entry.setField("abstract", htmlConverter.format(abstr));
          }
        }
      }
    }

    if (entry == null) {
      return null;
    } else {
      return cleanup(entry);
    }
  }
コード例 #2
0
ファイル: MedlineHandler.java プロジェクト: KommuSoft/jabref
  public void endElement(String uri, String localName, String qName) {
    if (localName.equals("PubmedArticle")) {
      // bibitems.add( new Bibitem(null, makeBibtexString(), Globals.nextKey(),"-1" )	 );
      // check if year ="" then give medline date instead
      if (year.equals("")) {
        if (!MedlineDate.equals("")) {
          // multi-year date format
          // System.out.println(MedlineDate);
          year = MedlineDate.substring(0, 4);
          // Matcher m = Pattern.compile("\\b[0-9]{4}\\b").matcher(MedlineDate);
          // if(m.matches())
          // year = m.group();
        }
      }

      // Build a string from the collected keywords:
      StringBuffer sb = new StringBuffer();
      for (Iterator<String> iterator = descriptors.iterator(); iterator.hasNext(); ) {
        String s = iterator.next();
        sb.append(s);
        if (iterator.hasNext()) sb.append(KEYWORD_SEPARATOR);
      }
      keywords = sb.toString();

      BibtexEntry b =
          new BibtexEntry(
              Util.createNeutralId(), // Globals.DEFAULT_BIBTEXENTRY_ID,
              Globals.getEntryType(
                  "article")); // id assumes an existing database so don't create one here
      if (!author.equals("")) {
        b.setField(
            "author", htmlConverter.formatUnicode(ImportFormatReader.expandAuthorInitials(author)));
        // b.setField("author",Util.replaceSpecialCharacters(ImportFormatReader.expandAuthorInitials(author)));
        author = "";
      }
      if (!title.equals("")) b.setField("title", htmlConverter.formatUnicode(title));
      // if (!title.equals("")) b.setField("title",Util.replaceSpecialCharacters(title));
      if (!journal.equals("")) b.setField("journal", journal);
      if (!year.equals("")) b.setField("year", year);
      // PENDING [email protected] 2005-05-27 : added call to fixPageRange
      if (!page.equals("")) b.setField("pages", fixPageRange(page));
      if (!volume.equals("")) b.setField("volume", volume);
      if (!language.equals("")) b.setField("language", language);
      if (!pst.equals("")) b.setField("medline-pst", pst);
      if (!abstractText.equals("")) b.setField("abstract", abstractText.replaceAll("%", "\\\\%"));
      if (!keywords.equals("")) b.setField("keywords", keywords);
      if (!month.equals("")) b.setField("month", month);
      // if (!url.equals("")) b.setField("url",url);
      if (!number.equals("")) b.setField("number", number);

      if (!doi.equals("")) {
        b.setField("doi", doi);
        b.setField("url", "http://dx.doi.org/" + doi);
      }
      if (!pii.equals("")) b.setField("pii", pii);
      if (!affiliation.equals("")) {
        b.setField("institution", affiliation.replaceAll("#", "\\\\#"));
      }

      // PENDING [email protected] 2005-05-27 : added "pmid" bibtex field
      // Older references do not have doi entries, but every
      // medline entry has a unique pubmed ID (aka primary ID).
      // Add a bibtex field for the pubmed ID for future use.
      if (!pubmedid.equals("")) b.setField("pmid", pubmedid);

      bibitems.add(b);

      abstractText = "";
      author = "";
      title = "";
      journal = "";
      keywords = "";
      doi = "";
      pii = "";
      year = "";
      forename = "";
      lastName = "";
      suffix = "";
      abstractText = "";
      affiliation = "";
      pubmedid = "";
      majorTopic = "";
      minorTopics = "";
      month = "";
      volume = "";
      language = "";
      pst = "";
      lastname = "";
      suffix = "";
      initials = "";
      number = "";
      page = "";
      medlineID = "";
      url = "";
      MedlineDate = "";
      descriptors.clear();
    } else if (localName.equals("ArticleTitle")) {
      inTitle = false;
    } else if (localName.equals("PubDate")) {
      inPubDate = false;
    } else if (localName.equals("Year")) {
      inYear = false;
    } else if (localName.equals("PMID")) {
      inPubMedID = false;
    } else if (localName.equals("MedlineDate")) {
      inMedlineDate = false;
    } else if (localName.equals("MedlineTA")) {
      inJournal = false;
    } // journal name
    else if (localName.equals("Month")) {
      inMonth = false;
    } else if (localName.equals("Volume")) {
      inVolume = false;
    } else if (localName.equals("Language")) {
      inLanguage = false;
    } else if (localName.equals("PublicationStatus")) {
      inPst = false;
    } else if (localName.equals("AuthorList")) {
      author = join(authors.toArray(), " and ");
      inAuthorList = false;
    } else if (localName.equals("Author")) {
      // forename sometimes has initials with " " in middle: is pattern [A-Z] [A-Z]
      // when above is the case replace it with initials
      if (forename.length() == 3 && forename.charAt(1) == ' ') {
        forename = initials;
      }

      // Put together name with last name first, and enter suffix in between if present:
      if (lastname.indexOf(" ") > 0) author = "{" + lastname + "}";
      else author = lastname;

      if (suffix.length() > 0) author = author + ", " + suffix;
      if (forename.length() > 0) author = author + ", " + forename;

      // author = initials + " " + lastname;
      authors.add(author);
      inAuthor = false;
      forename = "";
      initials = "";
      lastname = "";
      suffix = "";
    } else if (localName.equals("DescriptorName")) inDescriptorName = false;
    else if (localName.equals("QualifierName")) inQualifierName = false;
    else if (localName.equals("MeshHeading")) {
      inMeshHeader = false;
      if (minorTopics.equals("")) descriptors.add(majorTopic);
      else descriptors.add(majorTopic + ", " + minorTopics);
    } else if (localName.equals("LastName")) {
      inLastName = false;
    } else if (localName.equals("Suffix")) {
      inSuffix = false;
    } else if (localName.equals("ForeName") || localName.equals("FirstName")) {
      inForename = false;
    } else if (localName.equals("Issue")) {
      inIssue = false;
    } else if (localName.equals("MedlinePgn")) {
      inMedlinePgn = false;
    } // pagenumber
    else if (localName.equals("URL")) {
      inUrl = false;
    } else if (localName.equals("Initials")) {
      // initials= '.' + initials + '.';
      inInitials = false;
    } else if (localName.equals("AbstractText")) {
      inAbstractText = false;
    } else if (localName.equals("Affiliation")) {
      inAffiliation = false;
    } else if (localName.equals("ArticleId")) {
      if (inDoi) inDoi = false;
      else if (inPii) inPii = false;
    }
  }
コード例 #3
0
ファイル: ACMPortalFetcher.java プロジェクト: steimlfk/jabref
  /**
   * This method must convert HTML style char sequences to normal characters.
   *
   * @param text The text to handle.
   * @return The converted text.
   */
  private String convertHTMLChars(String text) {

    return htmlConverter.format(text);
  }