public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException { String id = Util.createNeutralId(); // createId(tp, _db); BibtexEntry result = new BibtexEntry(id, tp); skipWhitespace(); consume('{', '('); int c = peek(); if ((c != '\n') && (c != '\r')) skipWhitespace(); String key = null; boolean doAgain = true; while (doAgain) { doAgain = false; try { if (key != null) key = key + parseKey(); // parseTextToken(), else key = parseKey(); } catch (NoLabelException ex) { // This exception will be thrown if the entry lacks a key // altogether, like in "@article{ author = { ...". // It will also be thrown if a key contains =. c = (char) peek(); if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) { String fieldName = ex.getMessage().trim().toLowerCase(); String cont = parseFieldContent(fieldName); result.setField(fieldName, cont); } else { if (key != null) key = key + ex.getMessage() + "="; else key = ex.getMessage() + "="; doAgain = true; } } } if ((key != null) && key.equals("")) key = null; result.setField(BibtexFields.KEY_FIELD, key); skipWhitespace(); while (true) { c = peek(); if ((c == '}') || (c == ')')) { break; } if (c == ',') consume(','); skipWhitespace(); c = peek(); if ((c == '}') || (c == ')')) { break; } parseField(result); } consume('}', ')'); return result; }
private void parseField(BibtexEntry entry) throws IOException { String key = parseTextToken().toLowerCase(); // Util.pr("Field: _"+key+"_"); skipWhitespace(); consume('='); String content = parseFieldContent(key); // Now, if the field in question is set up to be fitted automatically // with braces around // capitals, we should remove those now when reading the field: if (Globals.prefs.putBracesAroundCapitals(key)) { content = Util.removeBracesAroundCapitals(content); } if (content.length() > 0) { if (entry.getField(key) == null) entry.setField(key, content); else { // The following hack enables the parser to deal with multiple // author or // editor lines, stringing them together instead of getting just // one of them. // Multiple author or editor lines are not allowed by the bibtex // format, but // at least one online database exports bibtex like that, making // it inconvenient // for users if JabRef didn't accept it. if (key.equals("author") || key.equals("editor")) entry.setField(key, entry.getField(key) + " and " + content); } } }
public BibtexString parseString() throws IOException { // Util.pr("Parsing string"); skipWhitespace(); consume('{', '('); // while (read() != '}'); skipWhitespace(); // Util.pr("Parsing string name"); String name = parseTextToken(); // Util.pr("Parsed string name"); skipWhitespace(); // Util.pr("Now the contents"); consume('='); String content = parseFieldContent(name); // Util.pr("Now I'm going to consume a }"); consume('}', ')'); // Util.pr("Finished string parsing."); String id = Util.createNeutralId(); return new BibtexString(id, name, content); }
private String parseFieldContent(String key) throws IOException { skipWhitespace(); StringBuffer value = new StringBuffer(); int c = '.'; while (((c = peek()) != ',') && (c != '}') && (c != ')')) { if (_eof) { throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); } if (c == '"') { StringBuffer text = parseQuotedFieldExactly(); value.append(fieldContentParser.format(text)); /* * * The following code doesn't handle {"} correctly: // value is * a string consume('"'); * * while (!((peek() == '"') && (j != '\\'))) { j = read(); if * (_eof || (j == -1) || (j == 65535)) { throw new * RuntimeException("Error in line "+line+ ": EOF in * mid-string"); } * * value.append((char) j); } * * consume('"'); */ } else if (c == '{') { // Value is a string enclosed in brackets. There can be pairs // of brackets inside of a field, so we need to count the // brackets to know when the string is finished. StringBuffer text = parseBracketedTextExactly(); value.append(fieldContentParser.format(text, key)); } else if (Character.isDigit((char) c)) { // value is a number String numString = parseTextToken(); // Morten Alver 2007-07-04: I don't see the point of parsing the integer // and converting it back to a string, so I'm removing the construct below // the following line: value.append(numString); /* try { // Fixme: What is this for? value.append(String.valueOf(Integer.parseInt(numString))); } catch (NumberFormatException e) { // If Integer could not be parsed then just add the text // Used to fix [ 1594123 ] Failure to import big numbers value.append(numString); } */ } else if (c == '#') { consume('#'); } else { String textToken = parseTextToken(); if (textToken.length() == 0) throw new IOException( "Error in line " + line + " or above: " + "Empty text token.\nThis could be caused " + "by a missing comma between two fields."); value.append("#").append(textToken).append("#"); // Util.pr(parseTextToken()); // throw new RuntimeException("Unknown field type"); } skipWhitespace(); } // Util.pr("Returning field content: "+value.toString()); // Check if we are to strip extra pairs of braces before returning: if (Globals.prefs.getBoolean("autoDoubleBraces")) { // Do it: while ((value.length() > 1) && (value.charAt(0) == '{') && (value.charAt(value.length() - 1) == '}')) { value.deleteCharAt(value.length() - 1); value.deleteCharAt(0); } // Problem: if the field content is "{DNA} blahblah {EPA}", one pair // too much will be removed. // Check if this is the case, and re-add as many pairs as needed. while (hasNegativeBraceCount(value.toString())) { value.insert(0, '{'); value.append('}'); } } return value.toString(); }
/** * Will parse the BibTex-Data found when reading from reader. * * <p>The reader will be consumed. * * <p>Multiple calls to parse() return the same results * * @return ParserResult * @throws IOException */ public ParserResult parse() throws IOException { // If we already parsed this, just return it. if (_pr != null) return _pr; _db = new BibtexDatabase(); // Bibtex related contents. _meta = new HashMap<String, String>(); // Metadata in comments for Bibkeeper. entryTypes = new HashMap<String, BibtexEntryType>(); // To store custem entry types parsed. _pr = new ParserResult(_db, _meta, entryTypes); // First see if we can find the version number of the JabRef version that // wrote the file: String versionNum = readJabRefVersionNumber(); if (versionNum != null) { _pr.setJabrefVersion(versionNum); setMajorMinorVersions(); } else { // No version number found. However, we have only } skipWhitespace(); try { while (!_eof) { boolean found = consumeUncritically('@'); if (!found) break; skipWhitespace(); String entryType = parseTextToken(); BibtexEntryType tp = BibtexEntryType.getType(entryType); boolean isEntry = (tp != null); // Util.pr(tp.getName()); if (!isEntry) { // The entry type name was not recognized. This can mean // that it is a string, preamble, or comment. If so, // parse and set accordingly. If not, assume it is an entry // with an unknown type. if (entryType.toLowerCase().equals("preamble")) { _db.setPreamble(parsePreamble()); } else if (entryType.toLowerCase().equals("string")) { BibtexString bs = parseString(); try { _db.addString(bs); } catch (KeyCollisionException ex) { _pr.addWarning(Globals.lang("Duplicate string name") + ": " + bs.getName()); // ex.printStackTrace(); } } else if (entryType.toLowerCase().equals("comment")) { StringBuffer commentBuf = parseBracketedTextExactly(); /** * Metadata are used to store Bibkeeper-specific information in .bib files. * * <p>Metadata are stored in bibtex files in the format * * @comment{jabref-meta: type:data0;data1;data2;...} * <p>Each comment that starts with the META_FLAG is stored in the meta HashMap, * with type as key. Unluckily, the old META_FLAG bibkeeper-meta: was used in JabRef * 1.0 and 1.1, so we need to support it as well. At least for a while. We'll always * save with the new one. */ String comment = commentBuf.toString().replaceAll("[\\x0d\\x0a]", ""); if (comment .substring(0, Math.min(comment.length(), GUIGlobals.META_FLAG.length())) .equals(GUIGlobals.META_FLAG) || comment .substring(0, Math.min(comment.length(), GUIGlobals.META_FLAG_OLD.length())) .equals(GUIGlobals.META_FLAG_OLD)) { String rest; if (comment.substring(0, GUIGlobals.META_FLAG.length()).equals(GUIGlobals.META_FLAG)) rest = comment.substring(GUIGlobals.META_FLAG.length()); else rest = comment.substring(GUIGlobals.META_FLAG_OLD.length()); int pos = rest.indexOf(':'); if (pos > 0) _meta.put(rest.substring(0, pos), rest.substring(pos + 1)); // We remove all line breaks in the metadata - these // will have been inserted // to prevent too long lines when the file was // saved, and are not part of the data. } /** * A custom entry type can also be stored in a * * @comment: */ if (comment .substring(0, Math.min(comment.length(), GUIGlobals.ENTRYTYPE_FLAG.length())) .equals(GUIGlobals.ENTRYTYPE_FLAG)) { CustomEntryType typ = CustomEntryType.parseEntryType(comment); entryTypes.put(typ.getName().toLowerCase(), typ); } } else { // The entry type was not recognized. This may mean that // it is a custom entry type whose definition will // appear // at the bottom of the file. So we use an // UnknownEntryType // to remember the type name by. tp = new UnknownEntryType(entryType.toLowerCase()); // System.out.println("unknown type: "+entryType); isEntry = true; } } if (isEntry) // True if not comment, preamble or string. { /** * Morten Alver 13 Aug 2006: Trying to make the parser more robust. If an exception is * thrown when parsing an entry, drop the entry and try to resume parsing. Add a warning * for the user. * * <p>An alternative solution is to try rescuing the entry for which parsing failed, by * returning the entry with the exception and adding it before parsing is continued. */ try { BibtexEntry be = parseEntry(tp); boolean duplicateKey = _db.insertEntry(be); if (duplicateKey) // JZTODO lyrics _pr.addWarning( Globals.lang("duplicate BibTeX key") + ": " + be.getCiteKey() + " (" + Globals.lang("grouping may not work for this entry") + ")"); else if (be.getCiteKey() == null || be.getCiteKey().equals("")) { _pr.addWarning( Globals.lang("empty BibTeX key") + ": " + be.getAuthorTitleYear(40) + " (" + Globals.lang("grouping may not work for this entry") + ")"); } } catch (IOException ex) { ex.printStackTrace(); _pr.addWarning( Globals.lang("Error occured when parsing entry") + ": '" + ex.getMessage() + "'. " + Globals.lang("Skipped entry.")); } } skipWhitespace(); } // Before returning the database, update entries with unknown type // based on parsed type definitions, if possible. checkEntryTypes(_pr); return _pr; } catch (KeyCollisionException kce) { // kce.printStackTrace(); throw new IOException("Duplicate ID in bibtex file: " + kce.toString()); } }