Пример #1
0
  public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException {
    String id = Util.createNeutralId(); // createId(tp, _db);
    BibtexEntry result = new BibtexEntry(id, tp);
    skipWhitespace();
    consume('{', '(');
    int c = peek();
    if ((c != '\n') && (c != '\r')) skipWhitespace();
    String key = null;
    boolean doAgain = true;
    while (doAgain) {
      doAgain = false;
      try {
        if (key != null) key = key + parseKey(); // parseTextToken(),
        else key = parseKey();
      } catch (NoLabelException ex) {
        // This exception will be thrown if the entry lacks a key
        // altogether, like in "@article{ author = { ...".
        // It will also be thrown if a key contains =.
        c = (char) peek();
        if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) {
          String fieldName = ex.getMessage().trim().toLowerCase();
          String cont = parseFieldContent(fieldName);
          result.setField(fieldName, cont);
        } else {
          if (key != null) key = key + ex.getMessage() + "=";
          else key = ex.getMessage() + "=";
          doAgain = true;
        }
      }
    }

    if ((key != null) && key.equals("")) key = null;

    result.setField(BibtexFields.KEY_FIELD, key);
    skipWhitespace();

    while (true) {
      c = peek();
      if ((c == '}') || (c == ')')) {
        break;
      }

      if (c == ',') consume(',');

      skipWhitespace();

      c = peek();
      if ((c == '}') || (c == ')')) {
        break;
      }
      parseField(result);
    }

    consume('}', ')');
    return result;
  }
Пример #2
0
 private void parseField(BibtexEntry entry) throws IOException {
   String key = parseTextToken().toLowerCase();
   // Util.pr("Field: _"+key+"_");
   skipWhitespace();
   consume('=');
   String content = parseFieldContent(key);
   // Now, if the field in question is set up to be fitted automatically
   // with braces around
   // capitals, we should remove those now when reading the field:
   if (Globals.prefs.putBracesAroundCapitals(key)) {
     content = Util.removeBracesAroundCapitals(content);
   }
   if (content.length() > 0) {
     if (entry.getField(key) == null) entry.setField(key, content);
     else {
       // The following hack enables the parser to deal with multiple
       // author or
       // editor lines, stringing them together instead of getting just
       // one of them.
       // Multiple author or editor lines are not allowed by the bibtex
       // format, but
       // at least one online database exports bibtex like that, making
       // it inconvenient
       // for users if JabRef didn't accept it.
       if (key.equals("author") || key.equals("editor"))
         entry.setField(key, entry.getField(key) + " and " + content);
     }
   }
 }
Пример #3
0
 public BibtexString parseString() throws IOException {
   // Util.pr("Parsing string");
   skipWhitespace();
   consume('{', '(');
   // while (read() != '}');
   skipWhitespace();
   // Util.pr("Parsing string name");
   String name = parseTextToken();
   // Util.pr("Parsed string name");
   skipWhitespace();
   // Util.pr("Now the contents");
   consume('=');
   String content = parseFieldContent(name);
   // Util.pr("Now I'm going to consume a }");
   consume('}', ')');
   // Util.pr("Finished string parsing.");
   String id = Util.createNeutralId();
   return new BibtexString(id, name, content);
 }
Пример #4
0
  private String parseFieldContent(String key) throws IOException {
    skipWhitespace();
    StringBuffer value = new StringBuffer();
    int c = '.';

    while (((c = peek()) != ',') && (c != '}') && (c != ')')) {

      if (_eof) {
        throw new RuntimeException("Error in line " + line + ": EOF in mid-string");
      }
      if (c == '"') {
        StringBuffer text = parseQuotedFieldExactly();
        value.append(fieldContentParser.format(text));
        /*
         *
         * The following code doesn't handle {"} correctly: // value is
         * a string consume('"');
         *
         * while (!((peek() == '"') && (j != '\\'))) { j = read(); if
         * (_eof || (j == -1) || (j == 65535)) { throw new
         * RuntimeException("Error in line "+line+ ": EOF in
         * mid-string"); }
         *
         * value.append((char) j); }
         *
         * consume('"');
         */
      } else if (c == '{') {
        // Value is a string enclosed in brackets. There can be pairs
        // of brackets inside of a field, so we need to count the
        // brackets to know when the string is finished.
        StringBuffer text = parseBracketedTextExactly();
        value.append(fieldContentParser.format(text, key));

      } else if (Character.isDigit((char) c)) { // value is a number

        String numString = parseTextToken();
        // Morten Alver 2007-07-04: I don't see the point of parsing the integer
        // and converting it back to a string, so I'm removing the construct below
        // the following line:
        value.append(numString);
        /*
                    try {
        	// Fixme: What is this for?
        	value.append(String.valueOf(Integer.parseInt(numString)));
        } catch (NumberFormatException e) {
        	// If Integer could not be parsed then just add the text
        	// Used to fix [ 1594123 ] Failure to import big numbers
        	value.append(numString);
        }
        */
      } else if (c == '#') {
        consume('#');
      } else {
        String textToken = parseTextToken();
        if (textToken.length() == 0)
          throw new IOException(
              "Error in line "
                  + line
                  + " or above: "
                  + "Empty text token.\nThis could be caused "
                  + "by a missing comma between two fields.");
        value.append("#").append(textToken).append("#");
        // Util.pr(parseTextToken());
        // throw new RuntimeException("Unknown field type");
      }
      skipWhitespace();
    }
    // Util.pr("Returning field content: "+value.toString());

    // Check if we are to strip extra pairs of braces before returning:
    if (Globals.prefs.getBoolean("autoDoubleBraces")) {
      // Do it:
      while ((value.length() > 1)
          && (value.charAt(0) == '{')
          && (value.charAt(value.length() - 1) == '}')) {
        value.deleteCharAt(value.length() - 1);
        value.deleteCharAt(0);
      }
      // Problem: if the field content is "{DNA} blahblah {EPA}", one pair
      // too much will be removed.
      // Check if this is the case, and re-add as many pairs as needed.
      while (hasNegativeBraceCount(value.toString())) {
        value.insert(0, '{');
        value.append('}');
      }
    }
    return value.toString();
  }
Пример #5
0
  /**
   * Will parse the BibTex-Data found when reading from reader.
   *
   * <p>The reader will be consumed.
   *
   * <p>Multiple calls to parse() return the same results
   *
   * @return ParserResult
   * @throws IOException
   */
  public ParserResult parse() throws IOException {

    // If we already parsed this, just return it.
    if (_pr != null) return _pr;

    _db = new BibtexDatabase(); // Bibtex related contents.
    _meta = new HashMap<String, String>(); // Metadata in comments for Bibkeeper.
    entryTypes = new HashMap<String, BibtexEntryType>(); // To store custem entry types parsed.
    _pr = new ParserResult(_db, _meta, entryTypes);

    // First see if we can find the version number of the JabRef version that
    // wrote the file:
    String versionNum = readJabRefVersionNumber();
    if (versionNum != null) {
      _pr.setJabrefVersion(versionNum);
      setMajorMinorVersions();
    } else {
      // No version number found. However, we have only
    }

    skipWhitespace();

    try {
      while (!_eof) {
        boolean found = consumeUncritically('@');
        if (!found) break;
        skipWhitespace();
        String entryType = parseTextToken();
        BibtexEntryType tp = BibtexEntryType.getType(entryType);
        boolean isEntry = (tp != null);
        // Util.pr(tp.getName());
        if (!isEntry) {
          // The entry type name was not recognized. This can mean
          // that it is a string, preamble, or comment. If so,
          // parse and set accordingly. If not, assume it is an entry
          // with an unknown type.
          if (entryType.toLowerCase().equals("preamble")) {
            _db.setPreamble(parsePreamble());
          } else if (entryType.toLowerCase().equals("string")) {
            BibtexString bs = parseString();
            try {
              _db.addString(bs);
            } catch (KeyCollisionException ex) {
              _pr.addWarning(Globals.lang("Duplicate string name") + ": " + bs.getName());
              // ex.printStackTrace();
            }
          } else if (entryType.toLowerCase().equals("comment")) {
            StringBuffer commentBuf = parseBracketedTextExactly();
            /**
             * Metadata are used to store Bibkeeper-specific information in .bib files.
             *
             * <p>Metadata are stored in bibtex files in the format
             *
             * @comment{jabref-meta: type:data0;data1;data2;...}
             *     <p>Each comment that starts with the META_FLAG is stored in the meta HashMap,
             *     with type as key. Unluckily, the old META_FLAG bibkeeper-meta: was used in JabRef
             *     1.0 and 1.1, so we need to support it as well. At least for a while. We'll always
             *     save with the new one.
             */
            String comment = commentBuf.toString().replaceAll("[\\x0d\\x0a]", "");
            if (comment
                    .substring(0, Math.min(comment.length(), GUIGlobals.META_FLAG.length()))
                    .equals(GUIGlobals.META_FLAG)
                || comment
                    .substring(0, Math.min(comment.length(), GUIGlobals.META_FLAG_OLD.length()))
                    .equals(GUIGlobals.META_FLAG_OLD)) {

              String rest;
              if (comment.substring(0, GUIGlobals.META_FLAG.length()).equals(GUIGlobals.META_FLAG))
                rest = comment.substring(GUIGlobals.META_FLAG.length());
              else rest = comment.substring(GUIGlobals.META_FLAG_OLD.length());

              int pos = rest.indexOf(':');

              if (pos > 0) _meta.put(rest.substring(0, pos), rest.substring(pos + 1));
              // We remove all line breaks in the metadata - these
              // will have been inserted
              // to prevent too long lines when the file was
              // saved, and are not part of the data.
            }

            /**
             * A custom entry type can also be stored in a
             *
             * @comment:
             */
            if (comment
                .substring(0, Math.min(comment.length(), GUIGlobals.ENTRYTYPE_FLAG.length()))
                .equals(GUIGlobals.ENTRYTYPE_FLAG)) {

              CustomEntryType typ = CustomEntryType.parseEntryType(comment);
              entryTypes.put(typ.getName().toLowerCase(), typ);
            }
          } else {
            // The entry type was not recognized. This may mean that
            // it is a custom entry type whose definition will
            // appear
            // at the bottom of the file. So we use an
            // UnknownEntryType
            // to remember the type name by.
            tp = new UnknownEntryType(entryType.toLowerCase());
            // System.out.println("unknown type: "+entryType);
            isEntry = true;
          }
        }

        if (isEntry) // True if not comment, preamble or string.
        {
          /**
           * Morten Alver 13 Aug 2006: Trying to make the parser more robust. If an exception is
           * thrown when parsing an entry, drop the entry and try to resume parsing. Add a warning
           * for the user.
           *
           * <p>An alternative solution is to try rescuing the entry for which parsing failed, by
           * returning the entry with the exception and adding it before parsing is continued.
           */
          try {
            BibtexEntry be = parseEntry(tp);

            boolean duplicateKey = _db.insertEntry(be);
            if (duplicateKey) // JZTODO lyrics
            _pr.addWarning(
                  Globals.lang("duplicate BibTeX key")
                      + ": "
                      + be.getCiteKey()
                      + " ("
                      + Globals.lang("grouping may not work for this entry")
                      + ")");
            else if (be.getCiteKey() == null || be.getCiteKey().equals("")) {
              _pr.addWarning(
                  Globals.lang("empty BibTeX key")
                      + ": "
                      + be.getAuthorTitleYear(40)
                      + " ("
                      + Globals.lang("grouping may not work for this entry")
                      + ")");
            }
          } catch (IOException ex) {
            ex.printStackTrace();
            _pr.addWarning(
                Globals.lang("Error occured when parsing entry")
                    + ": '"
                    + ex.getMessage()
                    + "'. "
                    + Globals.lang("Skipped entry."));
          }
        }

        skipWhitespace();
      }

      // Before returning the database, update entries with unknown type
      // based on parsed type definitions, if possible.
      checkEntryTypes(_pr);

      return _pr;
    } catch (KeyCollisionException kce) {
      // kce.printStackTrace();
      throw new IOException("Duplicate ID in bibtex file: " + kce.toString());
    }
  }