Beispiel #1
0
  /**
   * Scans an entity value. [9]
   *
   * @param p pe reference flag
   * @return value
   * @throws IOException I/O exception
   */
  private byte[] entityValue(final boolean p) throws IOException {
    final int qu = consume();
    if (qu != '\'' && qu != '"') {
      prev(1);
      return null;
    }
    TokenBuilder tok = new TokenBuilder();
    int ch;
    while ((ch = nextChar()) != qu) {
      if (ch == '&') tok.add(ref(false));
      else if (ch == '%') {
        if (!p) error(INVPE);
        tok.add(peRef());
      } else {
        tok.add(ch);
      }
    }

    final XMLInput tmp = input;
    input = new XMLInput(new IOContent(tok.finish()));
    tok = new TokenBuilder();
    while ((ch = consume()) != 0) {
      if (ch == '&') tok.add(ref(false));
      else tok.add(ch);
    }
    input = tmp;
    return tok.finish();
  }
Beispiel #2
0
 /**
  * Returns a string representation of a path summary node.
  *
  * @param data data reference
  * @param level level
  * @return string representation
  */
 byte[] info(final Data data, final int level) {
   final TokenBuilder tb = new TokenBuilder();
   if (level != 0) tb.add(Text.NL);
   for (int i = 0; i < level << 1; ++i) tb.add(' ');
   switch (kind) {
     case Data.DOC:
       tb.add(DOC);
       break;
     case Data.ELEM:
       tb.add(data.elemNames.key(name));
       break;
     case Data.TEXT:
       tb.add(TEXT);
       break;
     case Data.ATTR:
       tb.add(ATT);
       tb.add(data.attrNames.key(name));
       break;
     case Data.COMM:
       tb.add(COMMENT);
       break;
     case Data.PI:
       tb.add(PI);
       break;
   }
   tb.add(": " + stats);
   for (final PathNode p : children) tb.add(p.info(data, level + 1));
   return tb.finish();
 }
Beispiel #3
0
  /**
   * Scans an external ID.
   *
   * @param f full flag
   * @param r root flag
   * @return id
   * @throws IOException I/O exception
   */
  private byte[] externalID(final boolean f, final boolean r) throws IOException {
    byte[] cont = null;
    final boolean pub = consume(PUBLIC);
    if (pub || consume(SYSTEM)) {
      checkS();
      if (pub) {
        pubidLit();
        if (f) checkS();
      }
      final int qu = consume(); // [11]
      if (qu == '\'' || qu == '"') {
        int ch;
        final TokenBuilder tok = new TokenBuilder();
        while ((ch = nextChar()) != qu) tok.add(ch);
        if (!f) return null;
        final String name = string(tok.finish());
        if (!dtd && r) return cont;

        final XMLInput tin = input;
        try {
          final IO file = input.io().merge(name);
          cont = file.read();
        } catch (final IOException ex) {
          Util.debug(ex);
          // skip unknown DTDs/entities
          cont = new byte[] {'?'};
        }
        input = new XMLInput(new IOContent(cont, name));

        if (consume(XDECL)) {
          check(XML);
          s();
          if (version()) checkS();
          s();
          if (encoding() == null) error(TEXTENC);
          ch = nextChar();
          if (s(ch)) ch = nextChar();
          if (ch != '?') error(WRONGCHAR, '?', ch);
          ch = nextChar();
          if (ch != '>') error(WRONGCHAR, '>', ch);
          cont = Arrays.copyOfRange(cont, input.pos(), cont.length);
        }

        s();
        if (r) {
          extSubsetDecl();
          if (!consume((char) 0)) error(INVEND);
        }
        input = tin;
      } else {
        if (f) error(SCANQUOTE, (char) qu);
        prev(1);
      }
    }
    return cont;
  }
Beispiel #4
0
  @Override
  public synchronized byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    final long l = inX.length() + inY.length() + inZ.length();
    tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL);
    tb.add(LI_SIZE + Performance.format(l, true) + NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    addOccs(stats);
    stats.print(tb);
    return tb.finish();
  }
Beispiel #5
0
 /**
  * Consumes an XML name. [5]
  *
  * @param f force parsing
  * @return name
  * @throws IOException I/O exception
  */
 private byte[] name(final boolean f) throws IOException {
   final TokenBuilder name = new TokenBuilder();
   int c = consume();
   if (!isStartChar(c)) {
     if (f) error(INVNAME);
     prev(1);
     return null;
   }
   do name.add(c);
   while (isChar(c = nextChar()));
   prev(1);
   return name.finish();
 }
Beispiel #6
0
  /**
   * Scans a reference. [67]
   *
   * @param f dissolve entities
   * @return entity
   * @throws IOException I/O exception
   */
  private byte[] ref(final boolean f) throws IOException {
    // scans numeric entities
    if (consume('#')) { // [66]
      final TokenBuilder ent = new TokenBuilder();
      int b = 10;
      int ch = nextChar();
      ent.add(ch);
      if (ch == 'x') {
        b = 16;
        ent.add(ch = nextChar());
      }
      int n = 0;
      do {
        final boolean m = ch >= '0' && ch <= '9';
        final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F');
        if (!m && !h) {
          completeRef(ent);
          return QUESTION;
        }
        n *= b;
        n += ch & 15;
        if (!m) n += 9;
        ent.add(ch = nextChar());
      } while (ch != ';');

      if (!valid(n)) return QUESTION;
      ent.reset();
      ent.add(n);
      return ent.finish();
    }

    // scans predefined entities [68]
    final byte[] name = name(false);
    if (!consume(';')) return QUESTION;

    if (!f) return concat(AMPER, name, SEMI);

    byte[] en = ents.get(name);
    if (en == null) {
      // unknown entity: try HTML entities (lazy initialization)
      if (HTMLENTS.size() == 0) {
        for (int s = 0; s < HTMLENTITIES.length; s += 2) {
          HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1]));
        }
      }
      en = HTMLENTS.get(name);
    }
    return en == null ? QUESTION : en;
  }
Beispiel #7
0
  /**
   * Performs a test on the specified data.
   *
   * @param data data to be tested
   * @throws IOException I/O exception
   */
  private static void run(final byte[] data) throws IOException {
    final TokenBuilder tb = new TokenBuilder();
    final TextInput ti = new TextInput(new IOContent(data));
    ti.read();
    ti.reset();

    for (int b; (b = ti.read()) != -1; ) tb.add(b);
    try {
      ti.reset();
      assertTrue(
          "Mark should not be supported for data size of " + data.length,
          data.length < IO.BLOCKSIZE);
      tb.reset();
      for (int b; (b = ti.read()) != -1; ) tb.add(b);
      assertSame(data, tb.finish());
    } catch (final IOException ex) {
      assertTrue(
          "Mark could not be reset for data size of " + data.length, data.length >= IO.BLOCKSIZE);
    }
  }
Beispiel #8
0
 /**
  * Scans XML text.
  *
  * @param ch current character
  * @throws IOException I/O exception
  */
 private void content(final int ch) throws IOException {
   type = Type.TEXT;
   boolean f = true;
   int c = ch;
   while (c != 0) {
     if (c != '<') {
       if (c == '&') {
         // scan entity
         final byte[] r = ref(true);
         if (r.length == 1) token.add(r);
         else if (!input.add(r, false)) error(RECENT);
       } else {
         if (c == ']') {
           // ']]>' not allowed in content
           if (consume() == ']') {
             if (consume() == '>') error(CONTCDATA);
             prev(1);
           }
           prev(1);
         }
         // add character to cached content
         token.add(c);
       }
     } else {
       if (!f && !isCDATA()) {
         text = false;
         prev(1);
         if (chop) token.trim();
         return;
       }
       cDATA();
     }
     c = consume();
     f = false;
   }
   // end of file
   if (!fragment) {
     if (!ws(token.finish())) error(AFTERROOT);
     type = Type.EOF;
   }
 }
Beispiel #9
0
 /**
  * Scans a document encoding.
  *
  * @return encoding
  * @throws IOException I/O exception
  */
 private String encoding() throws IOException {
   if (!consume(ENCOD)) {
     if (fragment) error(TEXTENC);
     return null;
   }
   s();
   check('=');
   s();
   final TokenBuilder enc = new TokenBuilder();
   final int d = qu();
   int ch = nextChar();
   if (letter(ch) && ch != '_') {
     while (letterOrDigit(ch) || ch == '.' || ch == '-') {
       enc.add(ch);
       ch = nextChar();
     }
     prev(1);
   }
   check((char) d);
   if (enc.isEmpty()) error(DECLENCODE, enc);
   final String e = string(enc.finish());
   input.encoding(e);
   return e;
 }