public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException { String id = Util.createNeutralId(); // createId(tp, _db); BibtexEntry result = new BibtexEntry(id, tp); skipWhitespace(); consume('{', '('); int c = peek(); if ((c != '\n') && (c != '\r')) skipWhitespace(); String key = null; boolean doAgain = true; while (doAgain) { doAgain = false; try { if (key != null) key = key + parseKey(); // parseTextToken(), else key = parseKey(); } catch (NoLabelException ex) { // This exception will be thrown if the entry lacks a key // altogether, like in "@article{ author = { ...". // It will also be thrown if a key contains =. c = (char) peek(); if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) { String fieldName = ex.getMessage().trim().toLowerCase(); String cont = parseFieldContent(fieldName); result.setField(fieldName, cont); } else { if (key != null) key = key + ex.getMessage() + "="; else key = ex.getMessage() + "="; doAgain = true; } } } if ((key != null) && key.equals("")) key = null; result.setField(BibtexFields.KEY_FIELD, key); skipWhitespace(); while (true) { c = peek(); if ((c == '}') || (c == ')')) { break; } if (c == ',') consume(','); skipWhitespace(); c = peek(); if ((c == '}') || (c == ')')) { break; } parseField(result); } consume('}', ')'); return result; }
private void parseField(BibtexEntry entry) throws IOException { String key = parseTextToken().toLowerCase(); // Util.pr("Field: _"+key+"_"); skipWhitespace(); consume('='); String content = parseFieldContent(key); // Now, if the field in question is set up to be fitted automatically // with braces around // capitals, we should remove those now when reading the field: if (Globals.prefs.putBracesAroundCapitals(key)) { content = Util.removeBracesAroundCapitals(content); } if (content.length() > 0) { if (entry.getField(key) == null) entry.setField(key, content); else { // The following hack enables the parser to deal with multiple // author or // editor lines, stringing them together instead of getting just // one of them. // Multiple author or editor lines are not allowed by the bibtex // format, but // at least one online database exports bibtex like that, making // it inconvenient // for users if JabRef didn't accept it. if (key.equals("author") || key.equals("editor")) entry.setField(key, entry.getField(key) + " and " + content); } } }
private StringBuffer parseBracketedText() throws IOException { // Util.pr("Parse bracketed text"); StringBuffer value = new StringBuffer(); consume('{'); int brackets = 0; while (!((peek() == '}') && (brackets == 0))) { int j = read(); if ((j == -1) || (j == 65535)) { throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); } else if (j == '{') brackets++; else if (j == '}') brackets--; // If we encounter whitespace of any kind, read it as a // simple space, and ignore any others that follow immediately. /* * if (j == '\n') { if (peek() == '\n') value.append('\n'); } else */ if (Character.isWhitespace((char) j)) { String whs = skipAndRecordWhitespace(j); // System.out.println(":"+whs+":"); if (!whs.equals("") && !whs.equals("\n\t")) { // && // !whs.equals("\n")) whs = whs.replaceAll("\t", ""); // Remove tabulators. // while (whs.endsWith("\t")) // whs = whs.substring(0, whs.length()-1); value.append(whs); } else { value.append(' '); } } else value.append((char) j); } consume('}'); return value; }
public BibtexString parseString() throws IOException { // Util.pr("Parsing string"); skipWhitespace(); consume('{', '('); // while (read() != '}'); skipWhitespace(); // Util.pr("Parsing string name"); String name = parseTextToken(); // Util.pr("Parsed string name"); skipWhitespace(); // Util.pr("Now the contents"); consume('='); String content = parseFieldContent(name); // Util.pr("Now I'm going to consume a }"); consume('}', ')'); // Util.pr("Finished string parsing."); String id = Util.createNeutralId(); return new BibtexString(id, name, content); }
private StringBuffer parseQuotedFieldExactly() throws IOException { StringBuffer value = new StringBuffer(); consume('"'); int brackets = 0; while (!((peek() == '"') && (brackets == 0))) { int j = read(); if ((j == -1) || (j == 65535)) { throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); } else if (j == '{') brackets++; else if (j == '}') brackets--; value.append((char) j); } consume('"'); return value; }
private String parseFieldContent(String key) throws IOException { skipWhitespace(); StringBuffer value = new StringBuffer(); int c = '.'; while (((c = peek()) != ',') && (c != '}') && (c != ')')) { if (_eof) { throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); } if (c == '"') { StringBuffer text = parseQuotedFieldExactly(); value.append(fieldContentParser.format(text)); /* * * The following code doesn't handle {"} correctly: // value is * a string consume('"'); * * while (!((peek() == '"') && (j != '\\'))) { j = read(); if * (_eof || (j == -1) || (j == 65535)) { throw new * RuntimeException("Error in line "+line+ ": EOF in * mid-string"); } * * value.append((char) j); } * * consume('"'); */ } else if (c == '{') { // Value is a string enclosed in brackets. There can be pairs // of brackets inside of a field, so we need to count the // brackets to know when the string is finished. StringBuffer text = parseBracketedTextExactly(); value.append(fieldContentParser.format(text, key)); } else if (Character.isDigit((char) c)) { // value is a number String numString = parseTextToken(); // Morten Alver 2007-07-04: I don't see the point of parsing the integer // and converting it back to a string, so I'm removing the construct below // the following line: value.append(numString); /* try { // Fixme: What is this for? value.append(String.valueOf(Integer.parseInt(numString))); } catch (NumberFormatException e) { // If Integer could not be parsed then just add the text // Used to fix [ 1594123 ] Failure to import big numbers value.append(numString); } */ } else if (c == '#') { consume('#'); } else { String textToken = parseTextToken(); if (textToken.length() == 0) throw new IOException( "Error in line " + line + " or above: " + "Empty text token.\nThis could be caused " + "by a missing comma between two fields."); value.append("#").append(textToken).append("#"); // Util.pr(parseTextToken()); // throw new RuntimeException("Unknown field type"); } skipWhitespace(); } // Util.pr("Returning field content: "+value.toString()); // Check if we are to strip extra pairs of braces before returning: if (Globals.prefs.getBoolean("autoDoubleBraces")) { // Do it: while ((value.length() > 1) && (value.charAt(0) == '{') && (value.charAt(value.length() - 1) == '}')) { value.deleteCharAt(value.length() - 1); value.deleteCharAt(0); } // Problem: if the field content is "{DNA} blahblah {EPA}", one pair // too much will be removed. // Check if this is the case, and re-add as many pairs as needed. while (hasNegativeBraceCount(value.toString())) { value.insert(0, '{'); value.append('}'); } } return value.toString(); }