private void parseField(BibtexEntry entry) throws IOException { String key = parseTextToken().toLowerCase(); // Util.pr("Field: _"+key+"_"); skipWhitespace(); consume('='); String content = parseFieldContent(key); // Now, if the field in question is set up to be fitted automatically // with braces around // capitals, we should remove those now when reading the field: if (Globals.prefs.putBracesAroundCapitals(key)) { content = Util.removeBracesAroundCapitals(content); } if (content.length() > 0) { if (entry.getField(key) == null) entry.setField(key, content); else { // The following hack enables the parser to deal with multiple // author or // editor lines, stringing them together instead of getting just // one of them. // Multiple author or editor lines are not allowed by the bibtex // format, but // at least one online database exports bibtex like that, making // it inconvenient // for users if JabRef didn't accept it. if (key.equals("author") || key.equals("editor")) entry.setField(key, entry.getField(key) + " and " + content); } } }
public BibtexEntry makeBibtexEntry() { BibtexEntry e = new BibtexEntry(IdGenerator.next(), BibtexEntryTypes.INCOLLECTION); e.setField("title", "Marine finfish larviculture in Europe"); e.setField("bibtexkey", "shields01"); e.setField("year", "2001"); e.setField("author", "Kevin Shields"); return e; }
public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException { String id = Util.createNeutralId(); // createId(tp, _db); BibtexEntry result = new BibtexEntry(id, tp); skipWhitespace(); consume('{', '('); int c = peek(); if ((c != '\n') && (c != '\r')) skipWhitespace(); String key = null; boolean doAgain = true; while (doAgain) { doAgain = false; try { if (key != null) key = key + parseKey(); // parseTextToken(), else key = parseKey(); } catch (NoLabelException ex) { // This exception will be thrown if the entry lacks a key // altogether, like in "@article{ author = { ...". // It will also be thrown if a key contains =. c = (char) peek(); if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) { String fieldName = ex.getMessage().trim().toLowerCase(); String cont = parseFieldContent(fieldName); result.setField(fieldName, cont); } else { if (key != null) key = key + ex.getMessage() + "="; else key = ex.getMessage() + "="; doAgain = true; } } } if ((key != null) && key.equals("")) key = null; result.setField(BibtexFields.KEY_FIELD, key); skipWhitespace(); while (true) { c = peek(); if ((c == '}') || (c == ')')) { break; } if (c == ',') consume(','); skipWhitespace(); c = peek(); if ((c == '}') || (c == ')')) { break; } parseField(result); } consume('}', ')'); return result; }
/** * Removes matches of searchString in the entry's field. This is only possible if the search * expression is not a regExp. */ private void removeMatches(BibtexEntry entry) { String content = entry.getField(searchField); if (content == null) { return; // nothing to modify } StringBuffer sbOrig = new StringBuffer(content); StringBuffer sbLower = new StringBuffer(content.toLowerCase()); StringBuffer haystack = caseSensitive ? sbOrig : sbLower; String needle = caseSensitive ? searchExpression : searchExpression.toLowerCase(); int i; int j; int k; final String separator = Globals.prefs.get(JabRefPreferences.GROUP_KEYWORD_SEPARATOR); while ((i = haystack.indexOf(needle)) >= 0) { sbOrig.replace(i, i + needle.length(), ""); sbLower.replace(i, i + needle.length(), ""); // reduce spaces at i to 1 j = i; k = i; while (j - 1 >= 0 && separator.indexOf(haystack.charAt(j - 1)) >= 0) { --j; } while (k < haystack.length() && separator.indexOf(haystack.charAt(k)) >= 0) { ++k; } sbOrig.replace(j, k, j >= 0 && k < sbOrig.length() ? separator : ""); sbLower.replace(j, k, j >= 0 && k < sbOrig.length() ? separator : ""); } String result = sbOrig.toString().trim(); entry.setField(searchField, !result.isEmpty() ? result : null); }
@Override public AbstractUndoableEdit add(BibtexEntry[] entries) { if (!supportsAdd()) { return null; } if (entries != null && entries.length > 0) { NamedCompound ce = new NamedCompound(Globals.lang("add entries to group")); boolean modified = false; for (BibtexEntry entry : entries) { if (!getSearchRule().applyRule(SearchRule.NULL_QUERY, entry)) { String oldContent = entry.getField(searchField); String pre = Globals.prefs.get(JabRefPreferences.GROUP_KEYWORD_SEPARATOR); String newContent = (oldContent == null ? "" : oldContent + pre) + searchExpression; entry.setField(searchField, newContent); // Store undo information. ce.addEdit(new UndoableFieldChange(entry, searchField, oldContent, newContent)); modified = true; } } if (modified) { ce.end(); } return modified ? ce : null; } return null; }
public void testEntryEditorForFieldAnotherAutoCompleter() { // construct an EntryEditor ... JabRef jabref = TestUtils.getInitializedJabRef(); BibtexEntry bibtexEntry = new BibtexEntry(); bibtexEntry.setField("journal", "Testtext"); FieldEditor authorTextField = new FieldTextArea("journal", "New Testtext"); EntryEditor editor = new EntryEditor(jabref.jrf, jabref.jrf.basePanel(), bibtexEntry); // perform action ... editor.storeFieldAction.actionPerformed(new ActionEvent(authorTextField, 0, "")); // test content of stored words in autocompleter ... AbstractAutoCompleter autoCompleter = jabref.jrf.basePanel().getAutoCompleter("journal"); assertTrue(autoCompleter.indexContainsWord("New Testtext")); TestUtils.closeJabRef(); }
public static void removeDOIfromBibtexEntryField( BibtexEntry bes, String fieldName, NamedCompound ce) { String origValue = bes.getField(fieldName); String value = origValue; value = value.replaceAll(REGEXP_DOI_WITH_HTTP_PREFIX, ""); value = value.replaceAll(REGEXP_PLAINDOI, ""); value = value.trim(); if (value.isEmpty()) { value = null; } if (!origValue.equals(value)) { ce.addEdit(new UndoableFieldChange(bes, fieldName, origValue, value)); bes.setField(fieldName, value); } }
@Test @Ignore public void testInsertTestData() throws Exception { entry1 = new BibtexEntry(); JabRefPreferences jabRefPreferences = JabRefPreferences.getInstance(); ExternalFileType fileType = jabRefPreferences.getExternalFileTypeByExt("PDF"); FileListEntry fileListEntry = new FileListEntry("", ImportDataTest.FILE_IN_DATABASE.getAbsolutePath(), fileType); FileListTableModel model = new FileListTableModel(); model.addEntry(0, fileListEntry); entry1.setField("file", model.getStringRepresentation()); database.insertEntry(entry1); // #################### SETUP END ##################### // UnlinkedFilesCrawler crawler = new UnlinkedFilesCrawler(database); CheckableTreeNode treeNode = crawler.searchDirectory(ImportDataTest.EXISTING_FOLDER, new EntryFromPDFCreator()); Assert.assertNotNull(treeNode); /** Select all nodes manually. */ @SuppressWarnings("unchecked") Enumeration<CheckableTreeNode> enumeration = treeNode.breadthFirstEnumeration(); while (enumeration.hasMoreElements()) { CheckableTreeNode nextElement = enumeration.nextElement(); nextElement.setSelected(true); } List<File> resultList = getFileListFromNode(treeNode); Assert.assertFalse(resultList.isEmpty()); Assert.assertTrue(resultList.contains(ImportDataTest.FILE_NOT_IN_DATABASE)); Assert.assertFalse(resultList.contains(ImportDataTest.FILE_IN_DATABASE)); }
/** Parse the entries in the source, and return a List of BibtexEntry objects. */ @Override public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>(); StringBuilder sb = new StringBuilder(); BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); String str; while ((str = in.readLine()) != null) { if (str.length() < 3) { continue; } // begining of a new item if (str.substring(0, 6).equals("PMID- ")) { sb.append("::").append(str); } else { String beg = str.substring(0, 6); if (beg.indexOf(" ") > 0) { sb.append(" ## "); // mark the begining of each field sb.append(str); } else { sb.append("EOLEOL"); // mark the end of each line sb.append(str.trim()); } } } String[] entries = sb.toString().split("::"); // skip the first entry as it is either empty or has document header HashMap<String, String> hm = new HashMap<String, String>(); for (String entry : entries) { String[] fields = entry.split(" ## "); if (fields.length == 0) { fields = entry.split("\n"); } String Type = ""; String pages = ""; String shortauthor = ""; String fullauthor = ""; hm.clear(); for (String field : fields) { System.out.println(">>>" + field + "<<<"); // empty field don't do anything if (field.length() <= 2) { continue; } String beg = field.substring(0, 6); String value = field.substring(6); value = value.trim(); if (beg.equals("PT - ")) { // PT = value.replaceAll("JOURNAL ARTICLE", "article").replaceAll("Journal Article", // "article"); Type = "article"; // make all of them PT? } else if (beg.equals("TY - ")) { if ("CONF".equals(value)) { Type = "inproceedings"; } } else if (beg.equals("JO - ")) { hm.put("booktitle", value); } else if (beg.equals("FAU - ")) { String tmpauthor = value.replaceAll("EOLEOL", " and "); // if there is already someone there then append with "and" if (!"".equals(fullauthor)) { fullauthor = fullauthor + " and " + tmpauthor; } else { fullauthor = tmpauthor; } } else if (beg.equals("AU - ")) { String tmpauthor = value.replaceAll("EOLEOL", " and ").replaceAll(" ", ", "); // if there is already someone there then append with "and" if (!"".equals(shortauthor)) { shortauthor = shortauthor + " and " + tmpauthor; } else { shortauthor = tmpauthor; } } else if (beg.equals("TI - ")) { hm.put("title", value.replaceAll("EOLEOL", " ")); } else if (beg.equals("TA - ")) { hm.put("journal", value.replaceAll("EOLEOL", " ")); } else if (beg.equals("AB - ")) { hm.put("abstract", value.replaceAll("EOLEOL", " ")); } else if (beg.equals("PG - ")) { pages = value.replaceAll("-", "--"); } else if (beg.equals("IP - ")) { hm.put("number", value); } else if (beg.equals("DP - ")) { String[] parts = value.split(" "); // sometimes this is just year, sometimes full date hm.put("year", parts[0]); } else if (beg.equals("VI - ")) { hm.put("volume", value); } else if (beg.equals("AID - ")) { String[] parts = value.split(" "); if ("[doi]".equals(parts[1])) { hm.put("doi", parts[0]); hm.put("url", "http://dx.doi.org/" + parts[0]); } } } if (!"".equals(pages)) { hm.put("pages", pages); } if (!"".equals(fullauthor)) { hm.put("author", fullauthor); } else if (!"".equals(shortauthor)) { hm.put("author", shortauthor); } BibtexEntry b = new BibtexEntry( BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals.getEntryType(Type)); // id assumes an existing database so don't // create one here b.setField(hm); // the first bibitem is always empty, presumably as a result of trying // to parse header informaion. So add only if we have at least author or // title fields. if ((hm.get("author") != null) || (hm.get("title") != null)) { bibitems.add(b); } } return bibitems; }
public void endElement(String uri, String localName, String qName) { if (localName.equals("PubmedArticle")) { // bibitems.add( new Bibitem(null, makeBibtexString(), Globals.nextKey(),"-1" ) ); // check if year ="" then give medline date instead if (year.equals("")) { if (!MedlineDate.equals("")) { // multi-year date format // System.out.println(MedlineDate); year = MedlineDate.substring(0, 4); // Matcher m = Pattern.compile("\\b[0-9]{4}\\b").matcher(MedlineDate); // if(m.matches()) // year = m.group(); } } // Build a string from the collected keywords: StringBuffer sb = new StringBuffer(); for (Iterator<String> iterator = descriptors.iterator(); iterator.hasNext(); ) { String s = iterator.next(); sb.append(s); if (iterator.hasNext()) sb.append(KEYWORD_SEPARATOR); } keywords = sb.toString(); BibtexEntry b = new BibtexEntry( Util.createNeutralId(), // Globals.DEFAULT_BIBTEXENTRY_ID, Globals.getEntryType( "article")); // id assumes an existing database so don't create one here if (!author.equals("")) { b.setField( "author", htmlConverter.formatUnicode(ImportFormatReader.expandAuthorInitials(author))); // b.setField("author",Util.replaceSpecialCharacters(ImportFormatReader.expandAuthorInitials(author))); author = ""; } if (!title.equals("")) b.setField("title", htmlConverter.formatUnicode(title)); // if (!title.equals("")) b.setField("title",Util.replaceSpecialCharacters(title)); if (!journal.equals("")) b.setField("journal", journal); if (!year.equals("")) b.setField("year", year); // PENDING [email protected] 2005-05-27 : added call to fixPageRange if (!page.equals("")) b.setField("pages", fixPageRange(page)); if (!volume.equals("")) b.setField("volume", volume); if (!language.equals("")) b.setField("language", language); if (!pst.equals("")) b.setField("medline-pst", pst); if (!abstractText.equals("")) b.setField("abstract", abstractText.replaceAll("%", "\\\\%")); if (!keywords.equals("")) b.setField("keywords", keywords); if (!month.equals("")) b.setField("month", month); // if (!url.equals("")) b.setField("url",url); if (!number.equals("")) b.setField("number", number); if (!doi.equals("")) { b.setField("doi", doi); b.setField("url", "http://dx.doi.org/" + doi); } if (!pii.equals("")) b.setField("pii", pii); if (!affiliation.equals("")) { b.setField("institution", affiliation.replaceAll("#", "\\\\#")); } // PENDING [email protected] 2005-05-27 : added "pmid" bibtex field // Older references do not have doi entries, but every // medline entry has a unique pubmed ID (aka primary ID). // Add a bibtex field for the pubmed ID for future use. if (!pubmedid.equals("")) b.setField("pmid", pubmedid); bibitems.add(b); abstractText = ""; author = ""; title = ""; journal = ""; keywords = ""; doi = ""; pii = ""; year = ""; forename = ""; lastName = ""; suffix = ""; abstractText = ""; affiliation = ""; pubmedid = ""; majorTopic = ""; minorTopics = ""; month = ""; volume = ""; language = ""; pst = ""; lastname = ""; suffix = ""; initials = ""; number = ""; page = ""; medlineID = ""; url = ""; MedlineDate = ""; descriptors.clear(); } else if (localName.equals("ArticleTitle")) { inTitle = false; } else if (localName.equals("PubDate")) { inPubDate = false; } else if (localName.equals("Year")) { inYear = false; } else if (localName.equals("PMID")) { inPubMedID = false; } else if (localName.equals("MedlineDate")) { inMedlineDate = false; } else if (localName.equals("MedlineTA")) { inJournal = false; } // journal name else if (localName.equals("Month")) { inMonth = false; } else if (localName.equals("Volume")) { inVolume = false; } else if (localName.equals("Language")) { inLanguage = false; } else if (localName.equals("PublicationStatus")) { inPst = false; } else if (localName.equals("AuthorList")) { author = join(authors.toArray(), " and "); inAuthorList = false; } else if (localName.equals("Author")) { // forename sometimes has initials with " " in middle: is pattern [A-Z] [A-Z] // when above is the case replace it with initials if (forename.length() == 3 && forename.charAt(1) == ' ') { forename = initials; } // Put together name with last name first, and enter suffix in between if present: if (lastname.indexOf(" ") > 0) author = "{" + lastname + "}"; else author = lastname; if (suffix.length() > 0) author = author + ", " + suffix; if (forename.length() > 0) author = author + ", " + forename; // author = initials + " " + lastname; authors.add(author); inAuthor = false; forename = ""; initials = ""; lastname = ""; suffix = ""; } else if (localName.equals("DescriptorName")) inDescriptorName = false; else if (localName.equals("QualifierName")) inQualifierName = false; else if (localName.equals("MeshHeading")) { inMeshHeader = false; if (minorTopics.equals("")) descriptors.add(majorTopic); else descriptors.add(majorTopic + ", " + minorTopics); } else if (localName.equals("LastName")) { inLastName = false; } else if (localName.equals("Suffix")) { inSuffix = false; } else if (localName.equals("ForeName") || localName.equals("FirstName")) { inForename = false; } else if (localName.equals("Issue")) { inIssue = false; } else if (localName.equals("MedlinePgn")) { inMedlinePgn = false; } // pagenumber else if (localName.equals("URL")) { inUrl = false; } else if (localName.equals("Initials")) { // initials= '.' + initials + '.'; inInitials = false; } else if (localName.equals("AbstractText")) { inAbstractText = false; } else if (localName.equals("Affiliation")) { inAffiliation = false; } else if (localName.equals("ArticleId")) { if (inDoi) inDoi = false; else if (inPii) inPii = false; } }
private BibtexEntry parseNextEntry(String allText, int startIndex) { BibtexEntry entry = null; int index = allText.indexOf("<div class=\"detail", piv); int endIndex = allText.indexOf("</div>", index); if (index >= 0 && endIndex > 0) { endIndex += 6; piv = endIndex; String text = allText.substring(index, endIndex); BibtexEntryType type = null; String sourceField = null; String typeName = ""; Matcher typeMatcher = typePattern.matcher(text); if (typeMatcher.find()) { typeName = typeMatcher.group(1); if (typeName.equalsIgnoreCase("IEEE Journals & Magazines") || typeName.equalsIgnoreCase("IEEE Early Access Articles") || typeName.equalsIgnoreCase("IET Journals & Magazines") || typeName.equalsIgnoreCase("AIP Journals & Magazines") || typeName.equalsIgnoreCase("AVS Journals & Magazines") || typeName.equalsIgnoreCase("IBM Journals & Magazines") || typeName.equalsIgnoreCase("TUP Journals & Magazines") || typeName.equalsIgnoreCase("BIAI Journals & Magazines")) { type = BibtexEntryType.getType("article"); sourceField = "journal"; } else if (typeName.equalsIgnoreCase("IEEE Conference Publications") || typeName.equalsIgnoreCase("IET Conference Publications") || typeName.equalsIgnoreCase("VDE Conference Publications")) { type = BibtexEntryType.getType("inproceedings"); sourceField = "booktitle"; } else if (typeName.equalsIgnoreCase("IEEE Standards") || typeName.equalsIgnoreCase("Standards")) { type = BibtexEntryType.getType("standard"); sourceField = "number"; } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) { type = BibtexEntryType.getType("Electronic"); sourceField = "note"; } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters") || typeName.equalsIgnoreCase("MIT Press eBook Chapters") || typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { type = BibtexEntryType.getType("inCollection"); sourceField = "booktitle"; } } if (type == null) { type = BibtexEntryType.getType("misc"); sourceField = "note"; System.err.println("Type detection failed. Use MISC instead."); unparseable++; System.err.println(text); } entry = new BibtexEntry(IdGenerator.next(), type); if (typeName.equalsIgnoreCase("IEEE Standards")) { entry.setField("organization", "IEEE"); } if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) { entry.setField("publisher", "Wiley-IEEE Press"); } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) { entry.setField("publisher", "MIT Press"); } else if (typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { entry.setField("publisher", "IEEE USA"); } if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) { entry.setField("note", "Early Access"); } Set<String> fields = fieldPatterns.keySet(); for (String field : fields) { Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text); if (fieldMatcher.find()) { entry.setField(field, htmlConverter.format(fieldMatcher.group(1))); if (field.equals("title") && fieldMatcher.find()) { String sec_title = htmlConverter.format(fieldMatcher.group(1)); if (entry.getType() == BibtexEntryType.getStandardType("standard")) { sec_title = sec_title.replaceAll("IEEE Std ", ""); } entry.setField(sourceField, sec_title); } if (field.equals("pages") && fieldMatcher.groupCount() == 2) { entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2)); } } } Matcher authorMatcher = authorPattern.matcher(text); // System.out.println(text); StringBuilder authorNames = new StringBuilder(""); int authorCount = 0; while (authorMatcher.find()) { if (authorCount >= 1) { authorNames.append(" and "); } authorNames.append(htmlConverter.format(authorMatcher.group(1))); // System.out.println(authorCount + ": " + authorMatcher.group(1)); authorCount++; } entry.setField("author", authorNames.toString()); if (entry.getField("author") == null || entry.getField("author").startsWith("a href") || entry .getField("author") .startsWith("Topic(s)")) { // Fix for some documents without authors entry.setField("author", ""); } if (entry.getType() == BibtexEntryType.getStandardType("inproceedings") && entry.getField("author").equals("")) { entry.setType(BibtexEntryType.getStandardType("proceedings")); } if (includeAbstract) { index = text.indexOf("id=\"abstract"); if (index >= 0) { endIndex = text.indexOf("</div>", index) + 6; text = text.substring(index, endIndex); Matcher absMatcher = absPattern.matcher(text); if (absMatcher.find()) { // Clean-up abstract String abstr = absMatcher.group(1); abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1"); entry.setField("abstract", htmlConverter.format(abstr)); } } } } if (entry == null) { return null; } else { return cleanup(entry); } }
private BibtexEntry cleanup(BibtexEntry entry) { if (entry == null) { return null; } // clean up title String title = entry.getField("title"); if (title != null) { // USe the alt-text and replace image links title = title.replaceAll("[ ]?img src=[^ ]+ alt=\"([^\"]+)\">[ ]?", "\\$$1\\$"); // Try to sort out most of the /spl / conversions // Deal with this specific nested type first title = title.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$"); title = title.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$"); // Replace general expressions title = title.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$"); // Deal with subscripts and superscripts if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) { title = title.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$"); title = title.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$"); title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$"); title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$"); } else { title = title.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}"); title = title.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}"); title = title.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}"); title = title.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}"); } // Replace \infin with \infty title = title.replaceAll("\\\\infin", "\\\\infty"); // Unit formatting if (Globals.prefs.getBoolean(JabRefPreferences.USE_UNIT_FORMATTER_ON_SEARCH)) { title = unitFormatter.format(title); } // Automatic case keeping if (Globals.prefs.getBoolean(JabRefPreferences.USE_CASE_KEEPER_ON_SEARCH)) { title = caseKeeper.format(title); } // Write back entry.setField("title", title); } // clean up author /* String author = (String)entry.getField("author"); if (author != null) { if (author.indexOf("a href=") >= 0) { // Author parsing failed because it was empty entry.setField("author",""); // Maybe not needed anymore due to another change } else { author = author.replaceAll("\\s+", " "); author = author.replaceAll("\\.", ". "); author = author.replaceAll("([^;]+),([^;]+),([^;]+)","$1,$3,$2"); // Change order in case of Jr. etc author = author.replaceAll(" ", " "); author = author.replaceAll("\\. -", ".-"); author = author.replaceAll("; ", " and "); author = author.replaceAll(" ,", ","); author = author.replaceAll(" ", " "); author = author.replaceAll("[ ,;]+$", ""); entry.setField("author", author); } }*/ // clean up month String month = entry.getField("month"); if (month != null && !month.isEmpty()) { month = month.replaceAll("\\.", ""); month = month.toLowerCase(); Pattern monthPattern = Pattern.compile("(\\d*+)\\s*([a-z]*+)-*(\\d*+)\\s*([a-z]*+)"); Matcher mm = monthPattern.matcher(month); String date = month; if (mm.find()) { if (mm.group(3).isEmpty()) { if (!mm.group(2).isEmpty()) { date = "#" + mm.group(2).substring(0, 3) + "#"; if (!mm.group(1).isEmpty()) { date += " " + mm.group(1) + ","; } } else { date = mm.group(1) + ","; } } else if (mm.group(2).isEmpty()) { if (!mm.group(4).isEmpty()) { date = "#" + mm.group(4).substring(0, 3) + "# " + mm.group(1) + "--" + mm.group(3) + ","; } else { date += ","; } } else { date = "#" + mm.group(2).substring(0, 3) + "# " + mm.group(1) + "--#" + mm.group(4).substring(0, 3) + "# " + mm.group(3) + ","; } } // date = date.trim(); // if (!date.isEmpty()) { entry.setField("month", date); // } } // clean up pages String field = "pages"; String pages = entry.getField(field); if (pages != null) { String[] pageNumbers = pages.split("-"); if (pageNumbers.length == 2) { if (pageNumbers[0].equals(pageNumbers[1])) { // single page entry.setField(field, pageNumbers[0]); } else { entry.setField(field, pages.replaceAll("-", "--")); } } } // clean up publication field BibtexEntryType type = entry.getType(); String sourceField = ""; if (type.getName().equals("Article")) { sourceField = "journal"; entry.clearField("booktitle"); } else if (type.getName().equals("Inproceedings")) { sourceField = "booktitle"; } String fullName = entry.getField(sourceField); if (fullName != null) { if (type.getName().equals("Article")) { int ind = fullName.indexOf(": Accepted for future publication"); if (ind > 0) { fullName = fullName.substring(0, ind); entry.setField("year", "to be published"); entry.clearField("month"); entry.clearField("pages"); entry.clearField("number"); } String[] parts = fullName.split("[\\[\\]]"); // [see also...], [legacy...] fullName = parts[0]; if (parts.length == 3) { fullName += parts[2]; } if (entry.getField("note").equals("Early Access")) { entry.setField("year", "to be published"); entry.clearField("month"); entry.clearField("pages"); entry.clearField("number"); } } else { fullName = fullName .replace("Conference Proceedings", "Proceedings") .replace("Proceedings of", "Proceedings") .replace("Proceedings.", "Proceedings"); fullName = fullName.replaceAll("International", "Int."); fullName = fullName.replaceAll("Symposium", "Symp."); fullName = fullName.replaceAll("Conference", "Conf."); fullName = fullName.replaceAll(" on", " ").replace(" ", " "); } Matcher m1 = publicationPattern.matcher(fullName); String abrvPattern = ".*[^,] '?\\d+\\)?"; if (m1.find()) { String prefix = m1.group(2).trim(); String postfix = m1.group(1).trim(); String abrv = ""; String[] parts = prefix.split("\\. ", 2); if (parts.length == 2) { if (parts[0].matches(abrvPattern)) { prefix = parts[1]; abrv = parts[0]; } else { prefix = parts[0]; abrv = parts[1]; } } if (!prefix.matches(abrvPattern)) { fullName = prefix + " " + postfix + " " + abrv; fullName = fullName.trim(); } else { fullName = postfix + " " + prefix; } } if (type.getName().equals("Article")) { fullName = fullName.replace(" - ", "-"); // IEE Proceedings- fullName = fullName.trim(); if (Globals.prefs.getBoolean(JabRefPreferences.USE_IEEE_ABRV)) { fullName = Globals.journalAbbrev.getMedlineAbbreviation(fullName).orElse(fullName); } } if (type.getName().equals("Inproceedings")) { Matcher m2 = proceedingPattern.matcher(fullName); if (m2.find()) { String prefix = m2.group(2); String postfix = m2.group(1).replaceAll("\\.$", ""); if (!prefix.matches(abrvPattern)) { String abrv = ""; String[] parts = postfix.split("\\. ", 2); if (parts.length == 2) { if (parts[0].matches(abrvPattern)) { postfix = parts[1]; abrv = parts[0]; } else { postfix = parts[0]; abrv = parts[1]; } } fullName = prefix.trim() + " " + postfix.trim() + " " + abrv; } else { fullName = postfix.trim() + " " + prefix.trim(); } } fullName = fullName.trim(); fullName = fullName.replaceAll("^[tT]he ", "").replaceAll("^\\d{4} ", "").replaceAll("[,.]$", ""); String year = entry.getField("year"); fullName = fullName.replaceAll(", " + year + "\\.?", ""); if (!fullName.contains("Abstract") && !fullName.contains("Summaries") && !fullName.contains("Conference Record")) { fullName = "Proc. " + fullName; } } entry.setField(sourceField, fullName); } // clean up abstract String abstr = entry.getField("abstract"); if (abstr != null) { // Try to sort out most of the /spl / conversions // Deal with this specific nested type first abstr = abstr.replaceAll("/sub /spl infin//", "\\$_\\\\infty\\$"); abstr = abstr.replaceAll("/sup /spl infin//", "\\$\\^\\\\infty\\$"); // Replace general expressions abstr = abstr.replaceAll("/[sS]pl ([^/]+)/", "\\$\\\\$1\\$"); // Deal with subscripts and superscripts if (Globals.prefs.getBoolean(JabRefPreferences.USE_CONVERT_TO_EQUATION)) { abstr = abstr.replaceAll("/sup ([^/]+)/", "\\$\\^\\{$1\\}\\$"); abstr = abstr.replaceAll("/sub ([^/]+)/", "\\$_\\{$1\\}\\$"); abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\$\\^\\{$1\\}\\$"); abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\_\\{$1\\}\\$"); } else { abstr = abstr.replaceAll("/sup ([^/]+)/", "\\\\textsuperscript\\{$1\\}"); abstr = abstr.replaceAll("/sub ([^/]+)/", "\\\\textsubscript\\{$1\\}"); abstr = abstr.replaceAll("\\(sup\\)([^(]+)\\(/sup\\)", "\\\\textsuperscript\\{$1\\}"); abstr = abstr.replaceAll("\\(sub\\)([^(]+)\\(/sub\\)", "\\\\textsubscript\\{$1\\}"); } // Replace \infin with \infty abstr = abstr.replaceAll("\\\\infin", "\\\\infty"); // Write back entry.setField("abstract", abstr); } // Clean up url String url = entry.getField("url"); if (url != null) { entry.setField("url", "http://ieeexplore.ieee.org" + url.replace("tp=&", "")); } return entry; }
/** Parse the entries in the source, and return a List of BibtexEntry objects. */ public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>(); StringBuffer sb = new StringBuffer(); HashMap<String, String> hm = new HashMap<String, String>(); BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); String Type = null; String str; boolean first = true; line = 1; str = readLine(in); while (true) { if (str == null || str.length() == 0) { // end of record if (!hm.isEmpty()) { // have a record if (Type == null) { addNote(hm, "Publication Type: [NOT SPECIFIED]"); addNote(hm, "[PERHAPS NOT FULL FORMAT]"); Type = "article"; } // post-process Journal article if (Type.equals("article") && hm.get("booktitle") != null) { String booktitle = hm.get("booktitle"); hm.remove("booktitle"); hm.put("journal", booktitle); } BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals.getEntryType(Type)); // create one here b.setField(hm); bibitems.add(b); } hm.clear(); // ready for next record first = true; if (str == null) break; // end of file str = readLine(in); continue; } int fline = line; // save this before reading field contents Matcher fm = FIELD_PATTERN.matcher(str); if (fm.find()) { // save the field name (long and short) String fabbr = fm.group(1); String fname = fm.group(2); // read the contents of the field sb.setLength(0); // clear the buffer while ((str = readLine(in)) != null) { if (!str.startsWith(" ")) // field contents? break; // nope if (sb.length() > 0) { sb.append(" "); } sb.append(str.substring(4)); // skip spaces } String fstr = sb.toString(); if (fstr.length() == 0) { int line1 = line - 1; throw new IOException("illegal empty field at line " + line1); } // replace [Lt] with < fm = LT_PATTERN.matcher(fstr); if (fm.find()) fstr = fm.replaceAll("<"); // check for start of new record if (fabbr.equals("DN") && fname.equalsIgnoreCase("Database Name")) { if (!first) { throw new IOException("format error at line " + fline + ": DN out of order"); } first = false; } else if (first) { throw new IOException("format error at line " + fline + ": missing DN"); } if (fabbr.equals("PT")) { Type = null; String flow = fstr.toLowerCase(); String[] types = flow.split("; "); for (String type : types) { if ((type.contains("article")) || (type.contains("journal article"))) { Type = "article"; break; } else if (type.equals("dissertation")) { Type = "phdthesis"; break; } else if (type.equals("conference")) { Type = "inproceedings"; break; } else if (type.equals("book monograph") && Type == null) { Type = "book"; break; } else if (type.equals("report") && Type == null) { Type = "techreport"; break; } } if (Type == null) { Type = "misc"; } } String ftype = null; if (fabbr.equals("AB")) ftype = "abstract"; else if (fabbr.equals("AF")) ftype = "affiliation"; else if (fabbr.equals("AU")) { ftype = "author"; if (fstr.contains(";")) fstr = fstr.replaceAll("; ", " and "); } else if (fabbr.equals("CA")) ftype = "organization"; else if (fabbr.equals("DE")) ftype = "keywords"; else if (fabbr.equals("DO")) ftype = "doi"; else if (fabbr.equals("ED")) ftype = "editor"; else if (fabbr.equals("IB")) ftype = "ISBN"; else if (fabbr.equals("IS")) ftype = "ISSN"; else if (fabbr.equals("JN")) ftype = "journal"; else if (fabbr.equals("LA")) ftype = "language"; else if (fabbr.equals("PB")) ftype = "publisher"; else if (fabbr.equals("PY")) { ftype = "year"; if (hm.get("year") != null) { String oyear = hm.get("year"); if (!fstr.equals(oyear)) { addNote(hm, "Source Year: " + oyear + "."); // System.out.println(fstr + " != " + oyear); } } } else if (fabbr.equals("RL")) { ftype = "url"; String[] lines = fstr.split(" "); StringBuilder urls = new StringBuilder(); for (int ii = 0; ii < lines.length; ++ii) { if (lines[ii].startsWith("[URL:")) urls.append(lines[ii].substring(5)); else if (lines[ii].endsWith("]")) { int len = lines[ii].length(); urls.append(lines[ii].substring(0, len - 1)); if (ii < lines.length - 1) urls.append("\n"); } else urls.append(lines[ii]); } fstr = urls.toString(); } else if (fabbr.equals("SO")) { ftype = "booktitle"; // see if we can extract journal information // compact vol(no):page-page: Matcher pm = VOLNOPP_PATTERN.matcher(fstr); if (pm.find()) { hm.put("volume", pm.group(1)); hm.put("number", pm.group(2)); hm.put("pages", pm.group(3)); fstr = pm.replaceFirst(""); } // pages pm = PAGES_PATTERN.matcher(fstr); StringBuilder pages = new StringBuilder(); while (pm.find()) { if (pages.length() > 0) pages.append(","); String pp = pm.group(1); if (pp == null) pp = pm.group(2); if (pp == null) pp = pm.group(3); pages.append(pp); fstr = pm.replaceFirst(""); pm = PAGES_PATTERN.matcher(fstr); } if (pages.length() > 0) hm.put("pages", pages.toString()); // volume: pm = VOLUME_PATTERN.matcher(fstr); if (pm.find()) { hm.put("volume", pm.group(1)); fstr = pm.replaceFirst(""); } // number: pm = NUMBER_PATTERN.matcher(fstr); if (pm.find()) { hm.put("number", pm.group(1)); fstr = pm.replaceFirst(""); } // journal date: fstr = parseDate(hm, fstr); // strip trailing whitespace Pattern pp = Pattern.compile(",?\\s*$"); pm = pp.matcher(fstr); if (pm.find()) fstr = pm.replaceFirst(""); if (fstr.equals("")) continue; // System.out.println("SOURCE: \"" + fstr + "\""); } else if (fabbr.equals("TI")) ftype = "title"; else if (fabbr.equals("RE")) continue; // throw away References if (ftype != null) { hm.put(ftype, fstr); } else { addNote(hm, fname + ": " + fstr + "."); } } else str = readLine(in); } return bibitems; }
private void insertFields(String[] fields, BibtexEntry entry, XmlDocument xmlDocument) { DocumentWrapper document = new DocumentWrapper(xmlDocument); for (String field : fields) { if (entry.getField(field) != null) { continue; } if (field.equalsIgnoreCase("author")) { entry.setField(field, document.getAuthors("and")); } if (field.equalsIgnoreCase("title")) { entry.setField(field, document.getTitle()); } if (field.equalsIgnoreCase("abstract")) { entry.setField(field, document.getAbstract()); } if (field.equalsIgnoreCase("keywords")) { entry.setField(field, document.getKeyWords()); } if (field.equalsIgnoreCase("doi")) { entry.setField(field, document.getDoi()); } if (field.equalsIgnoreCase("pages")) { entry.setField(field, document.getPages()); } if (field.equalsIgnoreCase("volume")) { entry.setField(field, document.getVolume()); } if (field.equalsIgnoreCase("number")) { entry.setField(field, document.getNumber()); } if (field.equalsIgnoreCase("year")) { entry.setField(field, document.getYear()); } if (field.equalsIgnoreCase("month")) { entry.setField(field, document.getMonth()); } if (field.equalsIgnoreCase("day")) { entry.setField(field, document.getDay()); } if (field.equalsIgnoreCase("booktitle")) { entry.setField(field, document.getVenue()); } if (field.equalsIgnoreCase("journal")) { entry.setField(field, document.getVenue()); } } }