/** Check whether the source is in the correct format for this importer. */
  @Override
  public boolean isRecognizedFormat(InputStream stream) throws IOException {
    // Our strategy is to look for the "PY <year>" line.
    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
    // Pattern pat1 = Pattern.compile("PY:  \\d{4}");
    Pattern pat1 = Pattern.compile("Record.*INSPEC.*");

    // was PY \\\\d{4}? before
    String str;

    while ((str = in.readLine()) != null) {
      // Inspec and IEEE seem to have these strange " - " between key and value
      // str = str.replace(" - ", "");
      // System.out.println(str);

      if (pat1.matcher(str).find()) {
        return true;
      }
    }

    return false;
  }
Example #2
0
  /** Check whether the source is in the correct format for this importer. */
  @Override
  public boolean isRecognizedFormat(InputStream stream) throws IOException {

    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));

    String str;
    int i = 0;
    while (((str = in.readLine()) != null) && (i < 50)) {

      /**
       * The following line gives false positives for RIS files, so it should not be uncommented.
       * The hypen is a characteristic of the RIS format.
       *
       * <p>str = str.replace(" - ", "")
       */
      if (IsiImporter.ISI_PATTERN.matcher(str).find()) {
        return true;
      }

      i++;
    }

    return false;
  }
Example #3
0
  /** Parse the entries in the source, and return a List of BibtexEntry objects. */
  @Override
  public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status)
      throws IOException {

    ArrayList<BibtexEntry> bibItems = new ArrayList<BibtexEntry>();
    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));
    String line;
    HashMap<String, String> hm = new HashMap<String, String>();
    HashMap<String, StringBuffer> lines = new HashMap<String, StringBuffer>();
    StringBuffer previousLine = null;
    while ((line = in.readLine()) != null) {
      if (line.isEmpty()) {
        continue; // ignore empty lines, e.g. at file
      }
      // end
      // entry delimiter -> item complete
      if (line.equals("------")) {
        String[] type = new String[2];
        String[] pages = new String[2];
        String country = null;
        String address = null;
        String titleST = null;
        String titleTI = null;
        Vector<String> comments = new Vector<String>();
        // add item
        for (Map.Entry<String, StringBuffer> entry : lines.entrySet()) {
          if (entry.getKey().equals("AU")) {
            hm.put("author", entry.getValue().toString());
          } else if (entry.getKey().equals("TI")) {
            titleTI = entry.getValue().toString();
          } else if (entry.getKey().equals("ST")) {
            titleST = entry.getValue().toString();
          } else if (entry.getKey().equals("YP")) {
            hm.put("year", entry.getValue().toString());
          } else if (entry.getKey().equals("VL")) {
            hm.put("volume", entry.getValue().toString());
          } else if (entry.getKey().equals("NB")) {
            hm.put("number", entry.getValue().toString());
          } else if (entry.getKey().equals("PS")) {
            pages[0] = entry.getValue().toString();
          } else if (entry.getKey().equals("PE")) {
            pages[1] = entry.getValue().toString();
          } else if (entry.getKey().equals("KW")) {
            hm.put("keywords", entry.getValue().toString());
          } else if (entry.getKey().equals("RT")) {
            type[0] = entry.getValue().toString();
          } else if (entry.getKey().equals("SB")) {
            comments.add("Subject: " + entry.getValue());
          } else if (entry.getKey().equals("SA")) {
            comments.add("Secondary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("NT")) {
            hm.put("note", entry.getValue().toString());
          } else if (entry.getKey().equals("PB")) {
            hm.put("publisher", entry.getValue().toString());
          } else if (entry.getKey().equals("TA")) {
            comments.add("Tertiary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("TT")) {
            comments.add("Tertiary Title: " + entry.getValue());
          } else if (entry.getKey().equals("ED")) {
            hm.put("edition", entry.getValue().toString());
          } else if (entry.getKey().equals("TW")) {
            type[1] = entry.getValue().toString();
          } else if (entry.getKey().equals("QA")) {
            comments.add("Quaternary Authors: " + entry.getValue());
          } else if (entry.getKey().equals("QT")) {
            comments.add("Quaternary Title: " + entry.getValue());
          } else if (entry.getKey().equals("IS")) {
            hm.put("isbn", entry.getValue().toString());
          } else if (entry.getKey().equals("AB")) {
            hm.put("abstract", entry.getValue().toString());
          } else if (entry.getKey().equals("AD")) {
            address = entry.getValue().toString();
          } else if (entry.getKey().equals("LG")) {
            hm.put("language", entry.getValue().toString());
          } else if (entry.getKey().equals("CO")) {
            country = entry.getValue().toString();
          } else if (entry.getKey().equals("UR") || entry.getKey().equals("AT")) {
            String s = entry.getValue().toString().trim();
            hm.put(
                s.startsWith("http://") || s.startsWith("ftp://") ? "url" : "pdf",
                entry.getValue().toString());
          } else if (entry.getKey().equals("C1")) {
            comments.add("Custom1: " + entry.getValue());
          } else if (entry.getKey().equals("C2")) {
            comments.add("Custom2: " + entry.getValue());
          } else if (entry.getKey().equals("C3")) {
            comments.add("Custom3: " + entry.getValue());
          } else if (entry.getKey().equals("C4")) {
            comments.add("Custom4: " + entry.getValue());
          } else if (entry.getKey().equals("C5")) {
            comments.add("Custom5: " + entry.getValue());
          } else if (entry.getKey().equals("C6")) {
            comments.add("Custom6: " + entry.getValue());
          } else if (entry.getKey().equals("DE")) {
            hm.put("annote", entry.getValue().toString());
          } else if (entry.getKey().equals("CA")) {
            comments.add("Categories: " + entry.getValue());
          } else if (entry.getKey().equals("TH")) {
            comments.add("Short Title: " + entry.getValue());
          } else if (entry.getKey().equals("SE")) {
            hm.put("chapter", entry.getValue().toString());
            // else if (entry.getKey().equals("AC"))
            // hm.put("",entry.getValue().toString());
            // else if (entry.getKey().equals("LP"))
            // hm.put("",entry.getValue().toString());
          }
        }

        String bibtexType = "misc";
        // to find type, first check TW, then RT
        for (int i = 1; i >= 0 && bibtexType.equals("misc"); --i) {
          if (type[i] == null) {
            continue;
          }
          type[i] = type[i].toLowerCase();
          if (type[i].contains("article")) {
            bibtexType = "article";
          } else if (type[i].contains("journal")) {
            bibtexType = "article";
          } else if (type[i].contains("book section")) {
            bibtexType = "inbook";
          } else if (type[i].contains("book")) {
            bibtexType = "book";
          } else if (type[i].contains("conference")) {
            bibtexType = "inproceedings";
          } else if (type[i].contains("proceedings")) {
            bibtexType = "inproceedings";
          } else if (type[i].contains("report")) {
            bibtexType = "techreport";
          } else if (type[i].contains("thesis") && type[i].contains("master")) {
            bibtexType = "mastersthesis";
          } else if (type[i].contains("thesis")) {
            bibtexType = "phdthesis";
          }
        }

        // depending on bibtexType, decide where to place the titleRT and
        // titleTI
        if (bibtexType.equals("article")) {
          if (titleST != null) {
            hm.put("journal", titleST);
          }
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        } else if (bibtexType.equals("inbook")) {
          if (titleST != null) {
            hm.put("booktitle", titleST);
          }
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        } else {
          if (titleST != null) {
            hm.put("booktitle", titleST); // should not
          }
          // happen, I
          // think
          if (titleTI != null) {
            hm.put("title", titleTI);
          }
        }

        // concatenate pages
        if (pages[0] != null || pages[1] != null) {
          hm.put(
              "pages",
              (pages[0] != null ? pages[0] : "") + (pages[1] != null ? "--" + pages[1] : ""));
        }

        // concatenate address and country
        if (address != null) {
          hm.put("address", address + (country != null ? ", " + country : ""));
        }

        if (!comments.isEmpty()) { // set comment if present
          StringBuilder s = new StringBuilder();
          for (int i = 0; i < comments.size(); ++i) {
            s.append(i > 0 ? "; " : "").append(comments.elementAt(i));
          }
          hm.put("comment", s.toString());
        }
        BibtexEntry b =
            new BibtexEntry(DEFAULT_BIBTEXENTRY_ID, BibtexEntryTypes.getEntryType(bibtexType));
        b.setField(hm);
        bibItems.add(b);

        hm.clear();
        lines.clear();
        previousLine = null;

        continue;
      }
      // new key
      if (line.startsWith("--") && line.length() >= 7 && line.substring(4, 7).equals("-- ")) {
        lines.put(line.substring(2, 4), previousLine = new StringBuffer(line.substring(7)));
        continue;
      }
      // continuation (folding) of previous line
      if (previousLine == null) {
        return null;
      }
      previousLine.append(line.trim());
    }

    return bibItems;
  }
  /** Parse the entries in the source, and return a List of BibEntry objects. */
  @Override
  public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException {
    ArrayList<BibEntry> bibitems = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    String str;
    try (BufferedReader in =
        new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream))) {
      while ((str = in.readLine()) != null) {
        if (str.length() < 2) {
          continue;
        }
        if (str.indexOf("Record") == 0) {
          sb.append("__::__").append(str);
        } else {
          sb.append("__NEWFIELD__").append(str);
        }
      }
    }
    String[] entries = sb.toString().split("__::__");
    String type = "";
    HashMap<String, String> h = new HashMap<>();
    for (String entry : entries) {
      if (entry.indexOf("Record") != 0) {
        continue;
      }
      h.clear();

      String[] fields = entry.split("__NEWFIELD__");
      for (String s : fields) {
        // System.out.println(fields[j]);
        String f3 = s.substring(0, 2);
        String frest = s.substring(5);
        if ("TI".equals(f3)) {
          h.put("title", frest);
        } else if ("PY".equals(f3)) {
          h.put("year", frest);
        } else if ("AU".equals(f3)) {
          h.put(
              "author",
              AuthorList.fixAuthor_lastNameFirst(frest.replace(",-", ", ").replace(";", " and ")));
        } else if ("AB".equals(f3)) {
          h.put("abstract", frest);
        } else if ("ID".equals(f3)) {
          h.put("keywords", frest);
        } else if ("SO".equals(f3)) {
          int m = frest.indexOf('.');
          if (m >= 0) {
            String jr = frest.substring(0, m);
            h.put("journal", jr.replace("-", " "));
            frest = frest.substring(m);
            m = frest.indexOf(';');
            if (m >= 5) {
              String yr = frest.substring(m - 5, m);
              h.put("year", yr);
              frest = frest.substring(m);
              m = frest.indexOf(':');
              if (m >= 0) {
                String pg = frest.substring(m + 1).trim();
                h.put("pages", pg);
                h.put("volume", frest.substring(1, m));
              }
            }
          }

        } else if ("RT".equals(f3)) {
          frest = frest.trim();
          if ("Journal-Paper".equals(frest)) {
            type = "article";
          } else if ("Conference-Paper".equals(frest)
              || "Conference-Paper; Journal-Paper".equals(frest)) {
            type = "inproceedings";
          } else {
            type = frest.replace(" ", "");
          }
        }
      }
      BibEntry b =
          new BibEntry(
              DEFAULT_BIBTEXENTRY_ID,
              EntryTypes.getTypeOrDefault(type)); // id assumes an existing database so don't
      // create one here
      b.setField(h);

      bibitems.add(b);
    }

    return bibitems;
  }
Example #5
0
  /** Parse the entries in the source, and return a List of BibEntry objects. */
  @Override
  public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException {
    if (stream == null) {
      throw new IOException("No stream given.");
    }

    ArrayList<BibEntry> bibitems = new ArrayList<>();
    StringBuilder sb = new StringBuilder();

    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));

    // Pattern fieldPattern = Pattern.compile("^AU |^TI |^SO |^DT |^C1 |^AB
    // |^ID |^BP |^PY |^SE |^PY |^VL |^IS ");
    String str;

    while ((str = in.readLine()) != null) {
      if (str.length() < 3) {
        continue;
      }

      // begining of a new item
      if ("PT ".equals(str.substring(0, 3))) {
        sb.append("::").append(str);
      } else {
        String beg = str.substring(0, 3).trim();

        // I could have used the fieldPattern regular expression instead
        // however this seems to be
        // quick and dirty and it works!
        if (beg.length() == 2) {
          sb.append(" ## "); // mark the begining of each field
          sb.append(str);
        } else {
          sb.append("EOLEOL"); // mark the end of each line
          sb.append(str.trim()); // remove the initial spaces
        }
      }
    }

    String[] entries = sb.toString().split("::");

    HashMap<String, String> hm = new HashMap<>();

    // skip the first entry as it is either empty or has document header
    for (String entry : entries) {
      String[] fields = entry.split(" ## ");

      if (fields.length == 0) {
        fields = entry.split("\n");
      }

      String Type = "";
      String PT = "";
      String pages = "";
      hm.clear();

      for (String field : fields) {
        // empty field don't do anything
        if (field.length() <= 2) {
          continue;
        }

        String beg = field.substring(0, 2);
        String value = field.substring(3);
        if (value.startsWith(" - ")) {
          value = value.substring(3);
        }
        value = value.trim();

        if ("PT".equals(beg)) {
          if (value.startsWith("J")) {
            PT = "article";
          } else {
            PT = value;
          }
          Type = "article"; // make all of them PT?
        } else if ("TY".equals(beg)) {
          if ("JOUR".equals(value)) {
            Type = "article";
          } else if ("CONF".equals(value)) {
            Type = "inproceedings";
          }
        } else if ("JO".equals(beg)) {
          hm.put("booktitle", value);
        } else if ("AU".equals(beg)) {
          String author = IsiImporter.isiAuthorsConvert(value.replaceAll("EOLEOL", " and "));

          // if there is already someone there then append with "and"
          if (hm.get("author") != null) {
            author = hm.get("author") + " and " + author;
          }

          hm.put("author", author);
        } else if ("TI".equals(beg)) {
          hm.put("title", value.replaceAll("EOLEOL", " "));
        } else if ("SO".equals(beg) || "JA".equals(beg)) {
          hm.put("journal", value.replaceAll("EOLEOL", " "));
        } else if ("ID".equals(beg) || "KW".equals(beg)) {

          value = value.replaceAll("EOLEOL", " ");
          String existingKeywords = hm.get("keywords");
          if ((existingKeywords == null) || existingKeywords.contains(value)) {
            existingKeywords = value;
          } else {
            existingKeywords += ", " + value;
          }
          hm.put("keywords", existingKeywords);

        } else if ("AB".equals(beg)) {
          hm.put("abstract", value.replaceAll("EOLEOL", " "));
        } else if ("BP".equals(beg) || "BR".equals(beg) || "SP".equals(beg)) {
          pages = value;
        } else if ("EP".equals(beg)) {
          int detpos = value.indexOf(' ');

          // tweak for IEEE Explore
          if ((detpos != -1) && !value.substring(0, detpos).trim().isEmpty()) {
            value = value.substring(0, detpos);
          }

          pages = pages + "--" + value;
        } else if ("PS".equals(beg)) {
          pages = IsiImporter.parsePages(value);
        } else if ("AR".equals(beg)) {
          pages = value;
        } else if ("IS".equals(beg)) {
          hm.put("number", value);
        } else if ("PY".equals(beg)) {
          hm.put("year", value);
        } else if ("VL".equals(beg)) {
          hm.put("volume", value);
        } else if ("PU".equals(beg)) {
          hm.put("publisher", value);
        } else if ("DI".equals(beg)) {
          hm.put("doi", value);
        } else if ("PD".equals(beg)) {

          String month = IsiImporter.parseMonth(value);
          if (month != null) {
            hm.put("month", month);
          }

        } else if ("DT".equals(beg)) {
          Type = value;
          if ("Review".equals(Type)) {
            Type = "article"; // set "Review" in Note/Comment?
          } else if (Type.startsWith("Article")
              || Type.startsWith("Journal")
              || "article".equals(PT)) {
            Type = "article";
          } else {
            Type = "misc";
          }
        } else if ("CR".equals(beg)) {
          hm.put("CitedReferences", value.replaceAll("EOLEOL", " ; ").trim());
        } else {
          // Preserve all other entries except
          if ("ER".equals(beg) || "EF".equals(beg) || "VR".equals(beg) || "FN".equals(beg)) {
            continue;
          }
          hm.put(beg.toLowerCase(), value);
        }
      }

      if (!"".equals(pages)) {
        hm.put("pages", pages);
      }

      // Skip empty entries
      if (hm.isEmpty()) {
        continue;
      }

      BibEntry b = new BibEntry(DEFAULT_BIBTEXENTRY_ID, EntryTypes.getTypeOrDefault(Type));
      // id assumes an existing database so don't

      // Remove empty fields:
      List<Object> toRemove = new ArrayList<>();
      for (Map.Entry<String, String> field : hm.entrySet()) {
        String content = field.getValue();
        if ((content == null) || content.trim().isEmpty()) {
          toRemove.add(field.getKey());
        }
      }
      for (Object aToRemove : toRemove) {
        hm.remove(aToRemove);
      }

      // Polish entries
      IsiImporter.processSubSup(hm);
      IsiImporter.processCapitalization(hm);

      b.setField(hm);

      bibitems.add(b);
    }

    return bibitems;
  }