/** Parse the entries in the source, and return a List of BibtexEntry objects. */
  @Override
  public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status)
      throws IOException {

    ArrayList<BibtexEntry> bibItems = new ArrayList<BibtexEntry>();
    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
    String line;
    HashMap<String, String> hm = new HashMap<String, String>();
    HashMap<String, StringBuffer> lines = new HashMap<String, StringBuffer>();
    StringBuffer previousLine = null;
    while ((line = in.readLine()) != null) {
      if (line.isEmpty()) {
        continue; // ignore empty lines, e.g. at file
      }
      // end
      // entry delimiter -> item complete
      if (line.equals("------")) {
        String[] type = new String[2];
        String[] pages = new String[2];
        String country = null;
        String address = null;
        String titleST = null;
        String titleTI = null;
        Vector<String> comments = new Vector<String>();
        // add item
        for (Map.Entry<String, StringBuffer> entry : lines.entrySet()) {
          if (entry.getKey().equals("AU")) {
            hm.put("author", entry.getValue().toString());
          } else if (entry.getKey().equals("TI")) {
            titleTI = entry.getValue().toString();
          } else if (entry.getKey().equals("ST")) {
            titleST = entry.getValue().toString();
          } else if (entry.getKey().equals("YP")) {
            hm.put("year", entry.getValue().toString());
          } else if (entry.getKey().equals("VL")) {
            hm.put("volume", entry.getValue().toString());
          } else if (entry.getKey().equals("NB")) {
            hm.put("number", entry.getValue().toString());
          } else if (entry.getKey().equals("PS")) {
            pages[0] = entry.getValue().toString();
          } else if (entry.getKey().equals("PE")) {
            pages[1] = entry.getValue().toString();
          } else if (entry.getKey().equals("KW")) {
            hm.put("keywords", entry.getValue().toString());
          } else if (entry.getKey().equals("RT")) {
            type[0] = entry.getValue().toString();
          } else if (entry.getKey().equals("SB")) {
            comments.add("Subject: " + entry.getValue());
          } else if (entry.getKey().equals("SA")) {
            comments.add("Secondary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("NT")) {
            hm.put("note", entry.getValue().toString());
          } else if (entry.getKey().equals("PB")) {
            hm.put("publisher", entry.getValue().toString());
          } else if (entry.getKey().equals("TA")) {
            comments.add("Tertiary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("TT")) {
            comments.add("Tertiary Title: " + entry.getValue());
          } else if (entry.getKey().equals("ED")) {
            hm.put("edition", entry.getValue().toString());
          } else if (entry.getKey().equals("TW")) {
            type[1] = entry.getValue().toString();
          } else if (entry.getKey().equals("QA")) {
            comments.add("Quaternary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("QT")) {
            comments.add("Quaternary Title: " + entry.getValue());
          } else if (entry.getKey().equals("IS")) {
            hm.put("isbn", entry.getValue().toString());
          } else if (entry.getKey().equals("AB")) {
            hm.put("abstract", entry.getValue().toString());
          } else if (entry.getKey().equals("AD")) {
            address = entry.getValue().toString();
          } else if (entry.getKey().equals("LG")) {
            hm.put("language", entry.getValue().toString());
          } else if (entry.getKey().equals("CO")) {
            country = entry.getValue().toString();
          } else if (entry.getKey().equals("UR") || entry.getKey().equals("AT")) {
            String s = entry.getValue().toString().trim();
            hm.put(
                s.startsWith("http://") || s.startsWith("ftp://") ? "url" : "pdf",
                entry.getValue().toString());
          } else if (entry.getKey().equals("C1")) {
            comments.add("Custom1: " + entry.getValue());
          } else if (entry.getKey().equals("C2")) {
            comments.add("Custom2: " + entry.getValue());
          } else if (entry.getKey().equals("C3")) {
            comments.add("Custom3: " + entry.getValue());
          } else if (entry.getKey().equals("C4")) {
            comments.add("Custom4: " + entry.getValue());
          } else if (entry.getKey().equals("C5")) {
            comments.add("Custom5: " + entry.getValue());
          } else if (entry.getKey().equals("C6")) {
            comments.add("Custom6: " + entry.getValue());
          } else if (entry.getKey().equals("DE")) {
            hm.put("annote", entry.getValue().toString());
          } else if (entry.getKey().equals("CA")) {
            comments.add("Categories: " + entry.getValue());
          } else if (entry.getKey().equals("TH")) {
            comments.add("Short Title: " + entry.getValue());
          } else if (entry.getKey().equals("SE")) {
            hm.put("chapter", entry.getValue().toString());
            // else if (entry.getKey().equals("AC"))
            // hm.put("",entry.getValue().toString());
            // else if (entry.getKey().equals("LP"))
            // hm.put("",entry.getValue().toString());
          }
        }

        String bibtexType = "misc";
        // to find type, first check TW, then RT
        for (int i = 1; i >= 0 && bibtexType.equals("misc"); --i) {
          if (type[i] == null) {
            continue;
          }
          type[i] = type[i].toLowerCase();
          if (type[i].contains("article")) {
            bibtexType = "article";
          } else if (type[i].contains("journal")) {
            bibtexType = "article";
          } else if (type[i].contains("book section")) {
            bibtexType = "inbook";
          } else if (type[i].contains("book")) {
            bibtexType = "book";
          } else if (type[i].contains("conference")) {
            bibtexType = "inproceedings";
          } else if (type[i].contains("proceedings")) {
            bibtexType = "inproceedings";
          } else if (type[i].contains("report")) {
            bibtexType = "techreport";
          } else if (type[i].contains("thesis") && type[i].contains("master")) {
            bibtexType = "mastersthesis";
          } else if (type[i].contains("thesis")) {
            bibtexType = "phdthesis";
          }
        }

        // depending on bibtexType, decide where to place the titleRT and
        // titleTI
        if (bibtexType.equals("article")) {
          if (titleST != null) {
            hm.put("journal", titleST);
          }
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        } else if (bibtexType.equals("inbook")) {
          if (titleST != null) {
            hm.put("booktitle", titleST);
          }
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        } else {
          if (titleST != null) {
            hm.put("booktitle", titleST); // should not
          }
          // happen, I
          // think
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        }

        // concatenate pages
        if (pages[0] != null || pages[1] != null) {
          hm.put(
              "pages",
              (pages[0] != null ? pages[0] : "") + (pages[1] != null ? "--" + pages[1] : ""));
        }

        // concatenate address and country
        if (address != null) {
          hm.put("address", address + (country != null ? ", " + country : ""));
        }

        if (!comments.isEmpty()) { // set comment if present
          StringBuilder s = new StringBuilder();
          for (int i = 0; i < comments.size(); ++i) {
            s.append(i > 0 ? "; " : "").append(comments.elementAt(i));
          }
          hm.put("comment", s.toString());
        }
        BibtexEntry b =
            new BibtexEntry(DEFAULT_BIBTEXENTRY_ID, BibtexEntryTypes.getEntryType(bibtexType));
        b.setField(hm);
        bibItems.add(b);

        hm.clear();
        lines.clear();
        previousLine = null;

        continue;
      }
      // new key
      if (line.startsWith("--") && line.length() >= 7 && line.substring(4, 7).equals("-- ")) {
        lines.put(line.substring(2, 4), previousLine = new StringBuffer(line.substring(7)));
        continue;
      }
      // continuation (folding) of previous line
      if (previousLine == null) {
        return null;
      }
      previousLine.append(line.trim());
    }

    return bibItems;
  }
Exemple #2
0
  /** Parse the entries in the source, and return a List of BibtexEntry objects. */
  @Override
  public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status)
      throws IOException {
    if (stream == null) {
      throw new IOException("No stream given.");
    }

    ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>();
    StringBuilder sb = new StringBuilder();

    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));

    // Pattern fieldPattern = Pattern.compile("^AU |^TI |^SO |^DT |^C1 |^AB
    // |^ID |^BP |^PY |^SE |^PY |^VL |^IS ");
    String str;

    while ((str = in.readLine()) != null) {
      if (str.length() < 3) {
        continue;
      }

      // begining of a new item
      if (str.substring(0, 3).equals("PT ")) {
        sb.append("::").append(str);
      } else {
        String beg = str.substring(0, 3).trim();

        // I could have used the fieldPattern regular expression instead
        // however this seems to be
        // quick and dirty and it works!
        if (beg.length() == 2) {
          sb.append(" ## "); // mark the begining of each field
          sb.append(str);
        } else {
          sb.append("EOLEOL"); // mark the end of each line
          sb.append(str.trim()); // remove the initial spaces
        }
      }
    }

    String[] entries = sb.toString().split("::");

    HashMap<String, String> hm = new HashMap<String, String>();

    // skip the first entry as it is either empty or has document header
    for (String entry : entries) {
      String[] fields = entry.split(" ## ");

      if (fields.length == 0) {
        fields = entry.split("\n");
      }

      String Type = "";
      String PT = "";
      String pages = "";
      hm.clear();

      for (String field : fields) {
        // empty field don't do anything
        if (field.length() <= 2) {
          continue;
        }

        String beg = field.substring(0, 2);
        String value = field.substring(3);
        if (value.startsWith(" - ")) {
          value = value.substring(3);
        }
        value = value.trim();

        if (beg.equals("PT")) {
          if (value.startsWith("J")) {
            PT = "article";
          } else {
            PT = value;
          }
          Type = "article"; // make all of them PT?
        } else if (beg.equals("TY")) {
          if ("JOUR".equals(value)) {
            Type = "article";
          } else if ("CONF".equals(value)) {
            Type = "inproceedings";
          }
        } else if (beg.equals("JO")) {
          hm.put("booktitle", value);
        } else if (beg.equals("AU")) {
          String author = IsiImporter.isiAuthorsConvert(value.replaceAll("EOLEOL", " and "));

          // if there is already someone there then append with "and"
          if (hm.get("author") != null) {
            author = hm.get("author") + " and " + author;
          }

          hm.put("author", author);
        } else if (beg.equals("TI")) {
          hm.put("title", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("SO") || beg.equals("JA")) {
          hm.put("journal", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("ID") || beg.equals("KW")) {

          value = value.replaceAll("EOLEOL", " ");
          String existingKeywords = hm.get("keywords");
          if (existingKeywords != null && !existingKeywords.contains(value)) {
            existingKeywords += ", " + value;
          } else {
            existingKeywords = value;
          }
          hm.put("keywords", existingKeywords);

        } else if (beg.equals("AB")) {
          hm.put("abstract", value.replaceAll("EOLEOL", " "));
        } else if (beg.equals("BP") || beg.equals("BR") || beg.equals("SP")) {
          pages = value;
        } else if (beg.equals("EP")) {
          int detpos = value.indexOf(' ');

          // tweak for IEEE Explore
          if (detpos != -1 && !value.substring(0, detpos).trim().isEmpty()) {
            value = value.substring(0, detpos);
          }

          pages = pages + "--" + value;
        } else if (beg.equals("PS")) {
          pages = IsiImporter.parsePages(value);
        } else if (beg.equals("AR")) {
          pages = value;
        } else if (beg.equals("IS")) {
          hm.put("number", value);
        } else if (beg.equals("PY")) {
          hm.put("year", value);
        } else if (beg.equals("VL")) {
          hm.put("volume", value);
        } else if (beg.equals("PU")) {
          hm.put("publisher", value);
        } else if (beg.equals("DI")) {
          hm.put("doi", value);
        } else if (beg.equals("PD")) {

          String month = IsiImporter.parseMonth(value);
          if (month != null) {
            hm.put("month", month);
          }

        } else if (beg.equals("DT")) {
          Type = value;
          if (Type.equals("Review")) {
            Type = "article"; // set "Review" in Note/Comment?
          } else if (Type.startsWith("Article")
              || Type.startsWith("Journal")
              || PT.equals("article")) {
            Type = "article";
          } else {
            Type = "misc";
          }
        } else if (beg.equals("CR")) {
          hm.put("CitedReferences", value.replaceAll("EOLEOL", " ; ").trim());
        } else {
          // Preserve all other entries except
          if (beg.equals("ER") || beg.equals("EF") || beg.equals("VR") || beg.equals("FN")) {
            continue;
          }
          hm.put(beg, value);
        }
      }

      if (!"".equals(pages)) {
        hm.put("pages", pages);
      }

      // Skip empty entries
      if (hm.isEmpty()) {
        continue;
      }

      BibtexEntry b = new BibtexEntry(DEFAULT_BIBTEXENTRY_ID, BibtexEntryTypes.getEntryType(Type));
      // id assumes an existing database so don't

      // Remove empty fields:
      ArrayList<Object> toRemove = new ArrayList<Object>();
      for (String key : hm.keySet()) {
        String content = hm.get(key);
        if (content == null || content.trim().isEmpty()) {
          toRemove.add(key);
        }
      }
      for (Object aToRemove : toRemove) {
        hm.remove(aToRemove);
      }

      // Polish entries
      IsiImporter.processSubSup(hm);
      IsiImporter.processCapitalization(hm);

      b.setField(hm);

      bibitems.add(b);
    }

    return bibitems;
  }