/** Check whether the source is in the correct format for this importer. */ @Override public boolean isRecognizedFormat(InputStream stream) throws IOException { // Our strategy is to look for the "PY <year>" line. BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); // Pattern pat1 = Pattern.compile("PY: \\d{4}"); Pattern pat1 = Pattern.compile("Record.*INSPEC.*"); // was PY \\\\d{4}? before String str; while ((str = in.readLine()) != null) { // Inspec and IEEE seem to have these strange " - " between key and value // str = str.replace(" - ", ""); // System.out.println(str); if (pat1.matcher(str).find()) { return true; } } return false; }
/** Check whether the source is in the correct format for this importer. */ @Override public boolean isRecognizedFormat(InputStream stream) throws IOException { BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); String str; int i = 0; while (((str = in.readLine()) != null) && (i < 50)) { /** * The following line gives false positives for RIS files, so it should not be uncommented. * The hypen is a characteristic of the RIS format. * * <p>str = str.replace(" - ", "") */ if (IsiImporter.ISI_PATTERN.matcher(str).find()) { return true; } i++; } return false; }
@Override public void endElement(String uri, String localName, String qName) { if ("PubmedArticle".equals(localName)) { // bibitems.add( new Bibitem(null, makeBibtexString(), Globals.nextKey(),"-1" ) ); // check if year ="" then give medline date instead if ("".equals(year)) { if (!"".equals(MedlineDate)) { // multi-year date format // System.out.println(MedlineDate); year = MedlineDate.substring(0, 4); // Matcher m = Pattern.compile("\\b[0-9]{4}\\b").matcher(MedlineDate); // if(m.matches()) // year = m.group(); } } // Build a string from the collected keywords: StringBuilder sb = new StringBuilder(); for (Iterator<String> iterator = descriptors.iterator(); iterator.hasNext(); ) { String s = iterator.next(); sb.append(s); if (iterator.hasNext()) { sb.append(MedlineHandler.KEYWORD_SEPARATOR); } } String keywords = sb.toString(); BibEntry b = new BibEntry( IdGenerator.next(), // Globals.DEFAULT_BIBTEXENTRY_ID, EntryTypes.getTypeOrDefault( "article")); // id assumes an existing database so don't create one here if (!"".equals(author)) { b.setField( "author", MedlineHandler.htmlConverter.formatUnicode( ImportFormatReader.expandAuthorInitials(author))); // b.setField("author",Util.replaceSpecialCharacters(ImportFormatReader.expandAuthorInitials(author))); author = ""; } if (!"".equals(title)) { b.setField("title", MedlineHandler.htmlConverter.formatUnicode(title)); } // if (!title.equals("")) b.setField("title",Util.replaceSpecialCharacters(title)); if (!"".equals(journal)) { b.setField("journal", journal); } if (!"".equals(year)) { b.setField("year", year); } // PENDING [email protected] 2005-05-27 : added call to fixPageRange if (!"".equals(page)) { b.setField("pages", fixPageRange(page)); } if (!"".equals(volume)) { b.setField("volume", volume); } if (!"".equals(language)) { b.setField("language", language); } if (!"".equals(pst)) { b.setField("medline-pst", pst); } if (!"".equals(abstractText)) { b.setField("abstract", abstractText.replaceAll("%", "\\\\%")); } if (!"".equals(keywords)) { b.setField("keywords", keywords); } if (!"".equals(month)) { b.setField("month", month); } // if (!url.equals("")) b.setField("url",url); if (!"".equals(number)) { b.setField("number", number); } if (!"".equals(doi)) { b.setField("doi", doi); b.setField("url", "http://dx.doi.org/" + doi); } if (!"".equals(pii)) { b.setField("pii", pii); } if (!"".equals(pmc)) { b.setField("pmc", pmc); } if (!"".equals(affiliation)) { b.setField("institution", affiliation.replaceAll("#", "\\\\#")); } // PENDING [email protected] 2005-05-27 : added "pmid" bibtex field // Older references do not have doi entries, but every // medline entry has a unique pubmed ID (aka primary ID). // Add a bibtex field for the pubmed ID for future use. if (!"".equals(pubmedid)) { b.setField("pmid", pubmedid); } bibitems.add(b); abstractText = ""; author = ""; title = ""; journal = ""; keywords = ""; doi = ""; pii = ""; pmc = ""; year = ""; forename = ""; lastName = ""; suffix = ""; abstractText = ""; affiliation = ""; pubmedid = ""; majorTopic = ""; minorTopics = ""; month = ""; volume = ""; language = ""; pst = ""; lastname = ""; suffix = ""; initials = ""; number = ""; page = ""; String medlineID = ""; String url = ""; MedlineDate = ""; descriptors.clear(); } else if ("ArticleTitle".equals(localName)) { inTitle = false; } else if ("PubDate".equals(localName)) { inPubDate = false; } else if ("Year".equals(localName)) { inYear = false; } else if ("PMID".equals(localName)) { inPubMedID = false; } else if ("MedlineDate".equals(localName)) { inMedlineDate = false; } else if ("MedlineTA".equals(localName)) { inJournal = false; } // journal name else if ("Month".equals(localName)) { inMonth = false; } else if ("Volume".equals(localName)) { inVolume = false; } else if ("Language".equals(localName)) { inLanguage = false; } else if ("PublicationStatus".equals(localName)) { inPst = false; } else if ("AuthorList".equals(localName)) { author = join(authors.toArray(), " and "); inAuthorList = false; } else if ("Author".equals(localName)) { // forename sometimes has initials with " " in middle: is pattern [A-Z] [A-Z] // when above is the case replace it with initials if ((forename.length() == 3) && (forename.charAt(1) == ' ')) { forename = initials; } // Put together name with last name first, and enter suffix in between if present: if (lastname.indexOf(" ") > 0) { author = "{" + lastname + "}"; } else { author = lastname; } if (!suffix.isEmpty()) { author = author + ", " + suffix; } if (!forename.isEmpty()) { author = author + ", " + forename; } // author = initials + " " + lastname; authors.add(author); inAuthor = false; forename = ""; initials = ""; lastname = ""; suffix = ""; } else if ("DescriptorName".equals(localName)) { inDescriptorName = false; } else if ("QualifierName".equals(localName)) { inQualifierName = false; } else if ("MeshHeading".equals(localName)) { inMeshHeader = false; if ("".equals(minorTopics)) { descriptors.add(majorTopic); } else { descriptors.add(majorTopic + ", " + minorTopics); } } else if ("LastName".equals(localName)) { inLastName = false; } else if ("Suffix".equals(localName)) { inSuffix = false; } else if ("ForeName".equals(localName) || "FirstName".equals(localName)) { inForename = false; } else if ("Issue".equals(localName)) { inIssue = false; } else if ("MedlinePgn".equals(localName)) { inMedlinePgn = false; } // pagenumber else if ("URL".equals(localName)) { inUrl = false; } else if ("Initials".equals(localName)) { // initials= '.' + initials + '.'; inInitials = false; } else if ("AbstractText".equals(localName)) { inAbstractText = false; } else if ("Affiliation".equals(localName)) { inAffiliation = false; } else if ("ArticleId".equals(localName)) { if (inDoi) { inDoi = false; } else if (inPii) { inPii = false; } else if (inPmc) { inPmc = false; } } }
/** Parse the entries in the source, and return a List of BibtexEntry objects. */ @Override public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { ArrayList<BibtexEntry> bibItems = new ArrayList<BibtexEntry>(); BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); String line; HashMap<String, String> hm = new HashMap<String, String>(); HashMap<String, StringBuffer> lines = new HashMap<String, StringBuffer>(); StringBuffer previousLine = null; while ((line = in.readLine()) != null) { if (line.isEmpty()) { continue; // ignore empty lines, e.g. at file } // end // entry delimiter -> item complete if (line.equals("------")) { String[] type = new String[2]; String[] pages = new String[2]; String country = null; String address = null; String titleST = null; String titleTI = null; Vector<String> comments = new Vector<String>(); // add item for (Map.Entry<String, StringBuffer> entry : lines.entrySet()) { if (entry.getKey().equals("AU")) { hm.put("author", entry.getValue().toString()); } else if (entry.getKey().equals("TI")) { titleTI = entry.getValue().toString(); } else if (entry.getKey().equals("ST")) { titleST = entry.getValue().toString(); } else if (entry.getKey().equals("YP")) { hm.put("year", entry.getValue().toString()); } else if (entry.getKey().equals("VL")) { hm.put("volume", entry.getValue().toString()); } else if (entry.getKey().equals("NB")) { hm.put("number", entry.getValue().toString()); } else if (entry.getKey().equals("PS")) { pages[0] = entry.getValue().toString(); } else if (entry.getKey().equals("PE")) { pages[1] = entry.getValue().toString(); } else if (entry.getKey().equals("KW")) { hm.put("keywords", entry.getValue().toString()); } else if (entry.getKey().equals("RT")) { type[0] = entry.getValue().toString(); } else if (entry.getKey().equals("SB")) { comments.add("Subject: " + entry.getValue()); } else if (entry.getKey().equals("SA")) { comments.add("Secondary Authors: " + entry.getValue()); } else if (entry.getKey().equals("NT")) { hm.put("note", entry.getValue().toString()); } else if (entry.getKey().equals("PB")) { hm.put("publisher", entry.getValue().toString()); } else if (entry.getKey().equals("TA")) { comments.add("Tertiary Authors: " + entry.getValue()); } else if (entry.getKey().equals("TT")) { comments.add("Tertiary Title: " + entry.getValue()); } else if (entry.getKey().equals("ED")) { hm.put("edition", entry.getValue().toString()); } else if (entry.getKey().equals("TW")) { type[1] = entry.getValue().toString(); } else if (entry.getKey().equals("QA")) { comments.add("Quaternary Authors: " + entry.getValue()); } else if (entry.getKey().equals("QT")) { comments.add("Quaternary Title: " + entry.getValue()); } else if (entry.getKey().equals("IS")) { hm.put("isbn", entry.getValue().toString()); } else if (entry.getKey().equals("AB")) { hm.put("abstract", entry.getValue().toString()); } else if (entry.getKey().equals("AD")) { address = entry.getValue().toString(); } else if (entry.getKey().equals("LG")) { hm.put("language", entry.getValue().toString()); } else if (entry.getKey().equals("CO")) { country = entry.getValue().toString(); } else if (entry.getKey().equals("UR") || entry.getKey().equals("AT")) { String s = entry.getValue().toString().trim(); hm.put( s.startsWith("http://") || s.startsWith("ftp://") ? "url" : "pdf", entry.getValue().toString()); } else if (entry.getKey().equals("C1")) { comments.add("Custom1: " + entry.getValue()); } else if (entry.getKey().equals("C2")) { comments.add("Custom2: " + entry.getValue()); } else if (entry.getKey().equals("C3")) { comments.add("Custom3: " + entry.getValue()); } else if (entry.getKey().equals("C4")) { comments.add("Custom4: " + entry.getValue()); } else if (entry.getKey().equals("C5")) { comments.add("Custom5: " + entry.getValue()); } else if (entry.getKey().equals("C6")) { comments.add("Custom6: " + entry.getValue()); } else if (entry.getKey().equals("DE")) { hm.put("annote", entry.getValue().toString()); } else if (entry.getKey().equals("CA")) { comments.add("Categories: " + entry.getValue()); } else if (entry.getKey().equals("TH")) { comments.add("Short Title: " + entry.getValue()); } else if (entry.getKey().equals("SE")) { hm.put("chapter", entry.getValue().toString()); // else if (entry.getKey().equals("AC")) // hm.put("",entry.getValue().toString()); // else if (entry.getKey().equals("LP")) // hm.put("",entry.getValue().toString()); } } String bibtexType = "misc"; // to find type, first check TW, then RT for (int i = 1; i >= 0 && bibtexType.equals("misc"); --i) { if (type[i] == null) { continue; } type[i] = type[i].toLowerCase(); if (type[i].contains("article")) { bibtexType = "article"; } else if (type[i].contains("journal")) { bibtexType = "article"; } else if (type[i].contains("book section")) { bibtexType = "inbook"; } else if (type[i].contains("book")) { bibtexType = "book"; } else if (type[i].contains("conference")) { bibtexType = "inproceedings"; } else if (type[i].contains("proceedings")) { bibtexType = "inproceedings"; } else if (type[i].contains("report")) { bibtexType = "techreport"; } else if (type[i].contains("thesis") && type[i].contains("master")) { bibtexType = "mastersthesis"; } else if (type[i].contains("thesis")) { bibtexType = "phdthesis"; } } // depending on bibtexType, decide where to place the titleRT and // titleTI if (bibtexType.equals("article")) { if (titleST != null) { hm.put("journal", titleST); } if (titleTI != null) { hm.put("title", titleTI); } } else if (bibtexType.equals("inbook")) { if (titleST != null) { hm.put("booktitle", titleST); } if (titleTI != null) { hm.put("title", titleTI); } } else { if (titleST != null) { hm.put("booktitle", titleST); // should not } // happen, I // think if (titleTI != null) { hm.put("title", titleTI); } } // concatenate pages if (pages[0] != null || pages[1] != null) { hm.put( "pages", (pages[0] != null ? pages[0] : "") + (pages[1] != null ? "--" + pages[1] : "")); } // concatenate address and country if (address != null) { hm.put("address", address + (country != null ? ", " + country : "")); } if (!comments.isEmpty()) { // set comment if present StringBuilder s = new StringBuilder(); for (int i = 0; i < comments.size(); ++i) { s.append(i > 0 ? "; " : "").append(comments.elementAt(i)); } hm.put("comment", s.toString()); } BibtexEntry b = new BibtexEntry(DEFAULT_BIBTEXENTRY_ID, BibtexEntryTypes.getEntryType(bibtexType)); b.setField(hm); bibItems.add(b); hm.clear(); lines.clear(); previousLine = null; continue; } // new key if (line.startsWith("--") && line.length() >= 7 && line.substring(4, 7).equals("-- ")) { lines.put(line.substring(2, 4), previousLine = new StringBuffer(line.substring(7))); continue; } // continuation (folding) of previous line if (previousLine == null) { return null; } previousLine.append(line.trim()); } return bibItems; }
/** Parse the entries in the source, and return a List of BibEntry objects. */ @Override public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { ArrayList<BibEntry> bibitems = new ArrayList<>(); StringBuilder sb = new StringBuilder(); String str; try (BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream))) { while ((str = in.readLine()) != null) { if (str.length() < 2) { continue; } if (str.indexOf("Record") == 0) { sb.append("__::__").append(str); } else { sb.append("__NEWFIELD__").append(str); } } } String[] entries = sb.toString().split("__::__"); String type = ""; HashMap<String, String> h = new HashMap<>(); for (String entry : entries) { if (entry.indexOf("Record") != 0) { continue; } h.clear(); String[] fields = entry.split("__NEWFIELD__"); for (String s : fields) { // System.out.println(fields[j]); String f3 = s.substring(0, 2); String frest = s.substring(5); if ("TI".equals(f3)) { h.put("title", frest); } else if ("PY".equals(f3)) { h.put("year", frest); } else if ("AU".equals(f3)) { h.put( "author", AuthorList.fixAuthor_lastNameFirst(frest.replace(",-", ", ").replace(";", " and "))); } else if ("AB".equals(f3)) { h.put("abstract", frest); } else if ("ID".equals(f3)) { h.put("keywords", frest); } else if ("SO".equals(f3)) { int m = frest.indexOf('.'); if (m >= 0) { String jr = frest.substring(0, m); h.put("journal", jr.replace("-", " ")); frest = frest.substring(m); m = frest.indexOf(';'); if (m >= 5) { String yr = frest.substring(m - 5, m); h.put("year", yr); frest = frest.substring(m); m = frest.indexOf(':'); if (m >= 0) { String pg = frest.substring(m + 1).trim(); h.put("pages", pg); h.put("volume", frest.substring(1, m)); } } } } else if ("RT".equals(f3)) { frest = frest.trim(); if ("Journal-Paper".equals(frest)) { type = "article"; } else if ("Conference-Paper".equals(frest) || "Conference-Paper; Journal-Paper".equals(frest)) { type = "inproceedings"; } else { type = frest.replace(" ", ""); } } } BibEntry b = new BibEntry( DEFAULT_BIBTEXENTRY_ID, EntryTypes.getTypeOrDefault(type)); // id assumes an existing database so don't // create one here b.setField(h); bibitems.add(b); } return bibitems; }
/** Parse the entries in the source, and return a List of BibEntry objects. */ @Override public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { if (stream == null) { throw new IOException("No stream given."); } ArrayList<BibEntry> bibitems = new ArrayList<>(); StringBuilder sb = new StringBuilder(); BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); // Pattern fieldPattern = Pattern.compile("^AU |^TI |^SO |^DT |^C1 |^AB // |^ID |^BP |^PY |^SE |^PY |^VL |^IS "); String str; while ((str = in.readLine()) != null) { if (str.length() < 3) { continue; } // begining of a new item if ("PT ".equals(str.substring(0, 3))) { sb.append("::").append(str); } else { String beg = str.substring(0, 3).trim(); // I could have used the fieldPattern regular expression instead // however this seems to be // quick and dirty and it works! if (beg.length() == 2) { sb.append(" ## "); // mark the begining of each field sb.append(str); } else { sb.append("EOLEOL"); // mark the end of each line sb.append(str.trim()); // remove the initial spaces } } } String[] entries = sb.toString().split("::"); HashMap<String, String> hm = new HashMap<>(); // skip the first entry as it is either empty or has document header for (String entry : entries) { String[] fields = entry.split(" ## "); if (fields.length == 0) { fields = entry.split("\n"); } String Type = ""; String PT = ""; String pages = ""; hm.clear(); for (String field : fields) { // empty field don't do anything if (field.length() <= 2) { continue; } String beg = field.substring(0, 2); String value = field.substring(3); if (value.startsWith(" - ")) { value = value.substring(3); } value = value.trim(); if ("PT".equals(beg)) { if (value.startsWith("J")) { PT = "article"; } else { PT = value; } Type = "article"; // make all of them PT? } else if ("TY".equals(beg)) { if ("JOUR".equals(value)) { Type = "article"; } else if ("CONF".equals(value)) { Type = "inproceedings"; } } else if ("JO".equals(beg)) { hm.put("booktitle", value); } else if ("AU".equals(beg)) { String author = IsiImporter.isiAuthorsConvert(value.replaceAll("EOLEOL", " and ")); // if there is already someone there then append with "and" if (hm.get("author") != null) { author = hm.get("author") + " and " + author; } hm.put("author", author); } else if ("TI".equals(beg)) { hm.put("title", value.replaceAll("EOLEOL", " ")); } else if ("SO".equals(beg) || "JA".equals(beg)) { hm.put("journal", value.replaceAll("EOLEOL", " ")); } else if ("ID".equals(beg) || "KW".equals(beg)) { value = value.replaceAll("EOLEOL", " "); String existingKeywords = hm.get("keywords"); if ((existingKeywords == null) || existingKeywords.contains(value)) { existingKeywords = value; } else { existingKeywords += ", " + value; } hm.put("keywords", existingKeywords); } else if ("AB".equals(beg)) { hm.put("abstract", value.replaceAll("EOLEOL", " ")); } else if ("BP".equals(beg) || "BR".equals(beg) || "SP".equals(beg)) { pages = value; } else if ("EP".equals(beg)) { int detpos = value.indexOf(' '); // tweak for IEEE Explore if ((detpos != -1) && !value.substring(0, detpos).trim().isEmpty()) { value = value.substring(0, detpos); } pages = pages + "--" + value; } else if ("PS".equals(beg)) { pages = IsiImporter.parsePages(value); } else if ("AR".equals(beg)) { pages = value; } else if ("IS".equals(beg)) { hm.put("number", value); } else if ("PY".equals(beg)) { hm.put("year", value); } else if ("VL".equals(beg)) { hm.put("volume", value); } else if ("PU".equals(beg)) { hm.put("publisher", value); } else if ("DI".equals(beg)) { hm.put("doi", value); } else if ("PD".equals(beg)) { String month = IsiImporter.parseMonth(value); if (month != null) { hm.put("month", month); } } else if ("DT".equals(beg)) { Type = value; if ("Review".equals(Type)) { Type = "article"; // set "Review" in Note/Comment? } else if (Type.startsWith("Article") || Type.startsWith("Journal") || "article".equals(PT)) { Type = "article"; } else { Type = "misc"; } } else if ("CR".equals(beg)) { hm.put("CitedReferences", value.replaceAll("EOLEOL", " ; ").trim()); } else { // Preserve all other entries except if ("ER".equals(beg) || "EF".equals(beg) || "VR".equals(beg) || "FN".equals(beg)) { continue; } hm.put(beg.toLowerCase(), value); } } if (!"".equals(pages)) { hm.put("pages", pages); } // Skip empty entries if (hm.isEmpty()) { continue; } BibEntry b = new BibEntry(DEFAULT_BIBTEXENTRY_ID, EntryTypes.getTypeOrDefault(Type)); // id assumes an existing database so don't // Remove empty fields: List<Object> toRemove = new ArrayList<>(); for (Map.Entry<String, String> field : hm.entrySet()) { String content = field.getValue(); if ((content == null) || content.trim().isEmpty()) { toRemove.add(field.getKey()); } } for (Object aToRemove : toRemove) { hm.remove(aToRemove); } // Polish entries IsiImporter.processSubSup(hm); IsiImporter.processCapitalization(hm); b.setField(hm); bibitems.add(b); } return bibitems; }