private BibtexEntry parseNextEntry(String allText, int startIndex) { BibtexEntry entry = null; int index = allText.indexOf("<div class=\"detail", piv); int endIndex = allText.indexOf("</div>", index); if (index >= 0 && endIndex > 0) { endIndex += 6; piv = endIndex; String text = allText.substring(index, endIndex); BibtexEntryType type = null; String sourceField = null; String typeName = ""; Matcher typeMatcher = typePattern.matcher(text); if (typeMatcher.find()) { typeName = typeMatcher.group(1); if (typeName.equalsIgnoreCase("IEEE Journals & Magazines") || typeName.equalsIgnoreCase("IEEE Early Access Articles") || typeName.equalsIgnoreCase("IET Journals & Magazines") || typeName.equalsIgnoreCase("AIP Journals & Magazines") || typeName.equalsIgnoreCase("AVS Journals & Magazines") || typeName.equalsIgnoreCase("IBM Journals & Magazines") || typeName.equalsIgnoreCase("TUP Journals & Magazines") || typeName.equalsIgnoreCase("BIAI Journals & Magazines")) { type = BibtexEntryType.getType("article"); sourceField = "journal"; } else if (typeName.equalsIgnoreCase("IEEE Conference Publications") || typeName.equalsIgnoreCase("IET Conference Publications") || typeName.equalsIgnoreCase("VDE Conference Publications")) { type = BibtexEntryType.getType("inproceedings"); sourceField = "booktitle"; } else if (typeName.equalsIgnoreCase("IEEE Standards") || typeName.equalsIgnoreCase("Standards")) { type = BibtexEntryType.getType("standard"); sourceField = "number"; } else if (typeName.equalsIgnoreCase("IEEE eLearning Library Courses")) { type = BibtexEntryType.getType("Electronic"); sourceField = "note"; } else if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters") || typeName.equalsIgnoreCase("MIT Press eBook Chapters") || typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { type = BibtexEntryType.getType("inCollection"); sourceField = "booktitle"; } } if (type == null) { type = BibtexEntryType.getType("misc"); sourceField = "note"; System.err.println("Type detection failed. Use MISC instead."); unparseable++; System.err.println(text); } entry = new BibtexEntry(IdGenerator.next(), type); if (typeName.equalsIgnoreCase("IEEE Standards")) { entry.setField("organization", "IEEE"); } if (typeName.equalsIgnoreCase("Wiley-IEEE Press eBook Chapters")) { entry.setField("publisher", "Wiley-IEEE Press"); } else if (typeName.equalsIgnoreCase("MIT Press eBook Chapters")) { entry.setField("publisher", "MIT Press"); } else if (typeName.equalsIgnoreCase("IEEE USA Books & eBooks")) { entry.setField("publisher", "IEEE USA"); } if (typeName.equalsIgnoreCase("IEEE Early Access Articles")) { entry.setField("note", "Early Access"); } Set<String> fields = fieldPatterns.keySet(); for (String field : fields) { Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text); if (fieldMatcher.find()) { entry.setField(field, htmlConverter.format(fieldMatcher.group(1))); if (field.equals("title") && fieldMatcher.find()) { String sec_title = htmlConverter.format(fieldMatcher.group(1)); if (entry.getType() == BibtexEntryType.getStandardType("standard")) { sec_title = sec_title.replaceAll("IEEE Std ", ""); } entry.setField(sourceField, sec_title); } if (field.equals("pages") && fieldMatcher.groupCount() == 2) { entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2)); } } } Matcher authorMatcher = authorPattern.matcher(text); // System.out.println(text); StringBuilder authorNames = new StringBuilder(""); int authorCount = 0; while (authorMatcher.find()) { if (authorCount >= 1) { authorNames.append(" and "); } authorNames.append(htmlConverter.format(authorMatcher.group(1))); // System.out.println(authorCount + ": " + authorMatcher.group(1)); authorCount++; } entry.setField("author", authorNames.toString()); if (entry.getField("author") == null || entry.getField("author").startsWith("a href") || entry .getField("author") .startsWith("Topic(s)")) { // Fix for some documents without authors entry.setField("author", ""); } if (entry.getType() == BibtexEntryType.getStandardType("inproceedings") && entry.getField("author").equals("")) { entry.setType(BibtexEntryType.getStandardType("proceedings")); } if (includeAbstract) { index = text.indexOf("id=\"abstract"); if (index >= 0) { endIndex = text.indexOf("</div>", index) + 6; text = text.substring(index, endIndex); Matcher absMatcher = absPattern.matcher(text); if (absMatcher.find()) { // Clean-up abstract String abstr = absMatcher.group(1); abstr = abstr.replaceAll("<span class='snippet'>([\\w]+)</span>", "$1"); entry.setField("abstract", htmlConverter.format(abstr)); } } } } if (entry == null) { return null; } else { return cleanup(entry); } }
private boolean importPdfFiles(List<String> fileNames) { if (panel == null) return false; for (String fileName : fileNames) { List<BibtexEntry> xmpEntriesInFile = readXmpEntries(fileName); ImportDialog importDialog = new ImportDialog(dropRow, fileName); if (!hasXmpEntries(xmpEntriesInFile)) { importDialog.getRadioButtonXmp().setEnabled(false); } Tools.centerRelativeToWindow(importDialog, frame); importDialog.showDialog(); if (importDialog.getResult() == JOptionPane.OK_OPTION) { if (importDialog.getRadioButtonXmp().isSelected()) { // SplDatabaseChangeListener dataListener = new SplDatabaseChangeListener(frame, panel, // entryTable, fileName); // panel.database().addDatabaseChangeListener(dataListener); ImportMenuItem importer = new ImportMenuItem(frame, (entryTable == null)); importer.automatedImport(new String[] {fileName}); } else if (importDialog.getRadioButtonMrDlib().isSelected()) { MetaDataListDialog metaDataListDialog = new MetaDataListDialog(fileName, true); Tools.centerRelativeToWindow(metaDataListDialog, frame); metaDataListDialog.showDialog(); XmlDocuments documents = metaDataListDialog.getXmlDocuments(); if (documents != null && documents.getDocuments() != null && documents.getDocuments().size() > 0 && metaDataListDialog.getResult() == JOptionPane.OK_OPTION) { int selected = metaDataListDialog.getTableMetadata().getSelectedRow(); if (selected > -1 && selected < documents.getDocuments().size()) { XmlDocument document = documents.getDocuments().get(selected); String id = Util.createNeutralId(); BibtexEntry entry = new BibtexEntry(id); if (fieldExists(document.getType())) { BibtexEntryType type = BibtexEntryType.getStandardType(document.getType()); if (type == null) { type = BibtexEntryType.ARTICLE; } entry.setType(type); } else { entry.setType(BibtexEntryType.ARTICLE); } ArrayList<BibtexEntry> list = new ArrayList<BibtexEntry>(); list.add(entry); Util.setAutomaticFields(list, true, true, false); insertFields(entry.getRequiredFields(), entry, document); insertFields(entry.getGeneralFields(), entry, document); insertFields(entry.getOptionalFields(), entry, document); panel.database().insertEntry(entry); DroppedFileHandler dfh = new DroppedFileHandler(frame, panel); dfh.linkPdfToEntry(fileName, entryTable, entry); LabelPatternUtil.makeLabel(Globals.prefs.getKeyPattern(), panel.database(), entry); } else { createNewBlankEntry(fileName); } } else if (metaDataListDialog.getResult() == JOptionPane.CANCEL_OPTION) { continue; } else if (metaDataListDialog.getResult() == JOptionPane.NO_OPTION) { createNewBlankEntry(fileName); } else if (documents == null || documents.getDocuments() == null || documents.getDocuments().size() <= 0 && metaDataListDialog.getResult() == JOptionPane.OK_OPTION) { createNewBlankEntry(fileName); } } else if (importDialog.getRadioButtonNoMeta().isSelected()) { createNewBlankEntry(fileName); } else if (importDialog.getRadioButtonUpdateEmptyFields().isSelected()) { MetaDataListDialog metaDataListDialog = new MetaDataListDialog(fileName, false); Tools.centerRelativeToWindow(metaDataListDialog, frame); metaDataListDialog.showDialog(); XmlDocuments documents = metaDataListDialog.getXmlDocuments(); if (documents != null && documents.getDocuments() != null && documents.getDocuments().size() > 0 && metaDataListDialog.getResult() == JOptionPane.OK_OPTION) { int selected = metaDataListDialog.getTableMetadata().getSelectedRow(); if (selected > -1 && selected < documents.getDocuments().size()) { XmlDocument document = documents.getDocuments().get(selected); BibtexEntry entry = entryTable.getEntryAt(dropRow); if (fieldExists(document.getType())) { BibtexEntryType type = BibtexEntryType.getStandardType(document.getType()); if (type != null) { entry.setType(type); } } insertFields(entry.getRequiredFields(), entry, document); insertFields(entry.getGeneralFields(), entry, document); insertFields(entry.getOptionalFields(), entry, document); DroppedFileHandler dfh = new DroppedFileHandler(frame, panel); dfh.linkPdfToEntry(fileName, entryTable, dropRow); } } } else if (importDialog.getRadioButtononlyAttachPDF().isSelected()) { DroppedFileHandler dfh = new DroppedFileHandler(frame, panel); dfh.linkPdfToEntry(fileName, entryTable, dropRow); } } } return true; }