Exemple #1
0
  /**
   * Scans an entity value. [9]
   *
   * @param p pe reference flag
   * @return value
   * @throws IOException I/O exception
   */
  private byte[] entityValue(final boolean p) throws IOException {
    final int qu = consume();
    if (qu != '\'' && qu != '"') {
      prev(1);
      return null;
    }
    TokenBuilder tok = new TokenBuilder();
    int ch;
    while ((ch = nextChar()) != qu) {
      if (ch == '&') tok.add(ref(false));
      else if (ch == '%') {
        if (!p) error(INVPE);
        tok.add(peRef());
      } else {
        tok.add(ch);
      }
    }

    final XMLInput tmp = input;
    input = new XMLInput(new IOContent(tok.finish()));
    tok = new TokenBuilder();
    while ((ch = consume()) != 0) {
      if (ch == '&') tok.add(ref(false));
      else tok.add(ch);
    }
    input = tmp;
    return tok.finish();
  }
 /**
  * Joins the path.
  *
  * @param s segment to start with
  * @return joined path
  */
 private String join(final int s) {
   final TokenBuilder tb = new TokenBuilder();
   for (int p = s; p < segments.length; p++) {
     if (!tb.isEmpty()) tb.add('/');
     tb.add(segments[p]);
   }
   return tb.toString();
 }
Exemple #3
0
 /**
  * Returns the database path (i.e., all path entries except for the first).
  *
  * @return path depth
  */
 public String dbpath() {
   final TokenBuilder tb = new TokenBuilder();
   final int ps = segments.length;
   for (int p = 1; p < ps; p++) {
     if (!tb.isEmpty()) tb.add('/');
     tb.add(segments[p]);
   }
   return tb.toString();
 }
Exemple #4
0
  /**
   * Creates an XQuery representation for the specified table query.
   *
   * @param filter filter terms
   * @param cols filter columns
   * @param elem element flag
   * @param name name of root element
   * @param root root flag
   * @return query
   */
  public static String findTable(
      final StringList filter,
      final TokenList cols,
      final BoolList elem,
      final byte[] name,
      final boolean root) {

    final TokenBuilder tb = new TokenBuilder();
    final int is = filter.size();
    for (int i = 0; i < is; ++i) {
      final String[] spl = split(filter.get(i));
      for (final String s : spl) {
        final byte[] term = trim(replace(token(s), '"', ' '));
        if (term.length == 0) continue;
        tb.add('[');

        final boolean elm = elem.get(i);
        tb.add(elm ? ".//" : "@");
        tb.add("*:");
        tb.add(cols.get(i));

        if (term[0] == '<' || term[0] == '>') {
          tb.add(term[0]);
          tb.addLong(calcNum(substring(term, 1)));
        } else {
          tb.add(" contains text \"");
          tb.add(term);
          tb.add('"');
        }
        tb.add(']');
      }
    }
    return tb.isEmpty() ? "/" : (root ? "/" : "") + Axis.DESCORSELF + "::*:" + string(name) + tb;
  }
Exemple #5
0
 /**
  * Scans CDATA.
  *
  * @throws IOException I/O exception
  */
 private void cDATA() throws IOException {
   int ch;
   while (true) {
     while ((ch = nextChar()) != ']') token.add(ch);
     if (consume(']')) {
       if (consume('>')) return;
       prev(1);
     }
     token.add(ch);
   }
 }
Exemple #6
0
  @Override
  public synchronized byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    final long l = inX.length() + inY.length() + inZ.length();
    tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL);
    tb.add(LI_SIZE + Performance.format(l, true) + NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    addOccs(stats);
    stats.print(tb);
    return tb.finish();
  }
Exemple #7
0
  /**
   * Scans a reference. [67]
   *
   * @param f dissolve entities
   * @return entity
   * @throws IOException I/O exception
   */
  private byte[] ref(final boolean f) throws IOException {
    // scans numeric entities
    if (consume('#')) { // [66]
      final TokenBuilder ent = new TokenBuilder();
      int b = 10;
      int ch = nextChar();
      ent.add(ch);
      if (ch == 'x') {
        b = 16;
        ent.add(ch = nextChar());
      }
      int n = 0;
      do {
        final boolean m = ch >= '0' && ch <= '9';
        final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F');
        if (!m && !h) {
          completeRef(ent);
          return QUESTION;
        }
        n *= b;
        n += ch & 15;
        if (!m) n += 9;
        ent.add(ch = nextChar());
      } while (ch != ';');

      if (!valid(n)) return QUESTION;
      ent.reset();
      ent.add(n);
      return ent.finish();
    }

    // scans predefined entities [68]
    final byte[] name = name(false);
    if (!consume(';')) return QUESTION;

    if (!f) return concat(AMPER, name, SEMI);

    byte[] en = ents.get(name);
    if (en == null) {
      // unknown entity: try HTML entities (lazy initialization)
      if (HTMLENTS.size() == 0) {
        for (int s = 0; s < HTMLENTITIES.length; s += 2) {
          HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1]));
        }
      }
      en = HTMLENTS.get(name);
    }
    return en == null ? QUESTION : en;
  }
Exemple #8
0
  @Override
  public byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    tb.add(LI_STRUCTURE).add(HASH).add(NL);
    tb.add(LI_NAMES).add(data.meta.names(type)).add(NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    final int s = values.size();
    for (int p = 1; p <= s; p++) {
      final int oc = lenList.get(p);
      if (oc > 0 && stats.adding(oc)) stats.add(values.key(p), oc);
    }
    stats.print(tb);
    return tb.finish();
  }
Exemple #9
0
 /**
  * Consumes an Nmtoken. [7]
  *
  * @throws IOException I/O exception
  */
 private void nmtoken() throws IOException {
   final TokenBuilder name = new TokenBuilder();
   int c;
   while (isChar(c = nextChar())) name.add(c);
   prev(1);
   if (name.isEmpty()) error(INVNAME);
 }
Exemple #10
0
  /**
   * Scans a processing instruction.
   *
   * @throws IOException I/O exception
   */
  private void pi() throws IOException {
    final byte[] tok = name(true);
    if (eq(lc(tok), XML)) error(PIRES);
    token.add(tok);

    int ch = nextChar();
    if (ch != '?' && !ws(ch)) error(PITEXT);
    do {
      while (ch != '?') {
        token.add(ch);
        ch = nextChar();
      }
      if ((ch = consume()) == '>') return;
      token.add('?');
    } while (true);
  }
Exemple #11
0
 /**
  * Adds some characters to the entity.
  *
  * @param ent token builder
  * @throws IOException I/O exception
  */
 private void completeRef(final TokenBuilder ent) throws IOException {
   int ch = consume();
   while (ent.size() < 10 && ch >= ' ' && ch != ';') {
     ent.add(ch);
     ch = consume();
   }
 }
Exemple #12
0
 /**
  * Scans an attribute value. [10]
  *
  * @param ch current character
  * @throws IOException I/O exception
  */
 private void attValue(final int ch) throws IOException {
   boolean wrong = false;
   int c = ch;
   do {
     if (c == 0) error(ATTCLOSE, (char) c);
     wrong |= c == '\'' || c == '"';
     if (c == '<') error(wrong ? ATTCLOSE : ATTCHAR, (char) c);
     if (c == 0x0A) c = ' ';
     if (c == '&') {
       // verify...
       final byte[] r = ref(true);
       if (r.length == 1) token.add(r);
       else if (!input.add(r, false)) error(RECENT);
     } else {
       token.add(c);
     }
   } while ((c = consume()) != quote);
 }
Exemple #13
0
  /**
   * Scans an external ID.
   *
   * @param f full flag
   * @param r root flag
   * @return id
   * @throws IOException I/O exception
   */
  private byte[] externalID(final boolean f, final boolean r) throws IOException {
    byte[] cont = null;
    final boolean pub = consume(PUBLIC);
    if (pub || consume(SYSTEM)) {
      checkS();
      if (pub) {
        pubidLit();
        if (f) checkS();
      }
      final int qu = consume(); // [11]
      if (qu == '\'' || qu == '"') {
        int ch;
        final TokenBuilder tok = new TokenBuilder();
        while ((ch = nextChar()) != qu) tok.add(ch);
        if (!f) return null;
        final String name = string(tok.finish());
        if (!dtd && r) return cont;

        final XMLInput tin = input;
        try {
          final IO file = input.io().merge(name);
          cont = file.read();
        } catch (final IOException ex) {
          Util.debug(ex);
          // skip unknown DTDs/entities
          cont = new byte[] {'?'};
        }
        input = new XMLInput(new IOContent(cont, name));

        if (consume(XDECL)) {
          check(XML);
          s();
          if (version()) checkS();
          s();
          if (encoding() == null) error(TEXTENC);
          ch = nextChar();
          if (s(ch)) ch = nextChar();
          if (ch != '?') error(WRONGCHAR, '?', ch);
          ch = nextChar();
          if (ch != '>') error(WRONGCHAR, '>', ch);
          cont = Arrays.copyOfRange(cont, input.pos(), cont.length);
        }

        s();
        if (r) {
          extSubsetDecl();
          if (!consume((char) 0)) error(INVEND);
        }
        input = tin;
      } else {
        if (f) error(SCANQUOTE, (char) qu);
        prev(1);
      }
    }
    return cont;
  }
Exemple #14
0
  /** Tests the specified instance. */
  @Test
  public void test() {
    final StringBuilder sb = new StringBuilder();
    int fail = 0;

    for (final Object[] qu : queries) {
      final boolean correct = qu.length == 3;
      final String query = qu[correct ? 2 : 1].toString();
      final Value cmp = correct ? (Value) qu[1] : null;

      final QueryProcessor qp = new QueryProcessor(query, context);
      try {
        final Value val = qp.value();
        if (!correct || !new DeepCompare().equal(val, cmp)) {
          sb.append("[" + qu[0] + "] " + query);
          String s = correct && cmp.size() != 1 ? "#" + cmp.size() : "";
          sb.append("\n[E" + s + "] ");
          if (correct) {
            final String cp = cmp.toString();
            sb.append('\'');
            sb.append(cp.length() > 1000 ? cp.substring(0, 1000) + "..." : cp);
            sb.append('\'');
          } else {
            sb.append("error");
          }
          final TokenBuilder types = new TokenBuilder();
          for (final Item it : val) types.add(it.type.toString()).add(" ");
          s = val.size() == 1 ? "" : "#" + val.size();
          sb.append("\n[F" + s + "] '" + val + "', " + types + details() + '\n');
          ++fail;
        }
      } catch (final Exception ex) {
        final String msg = ex.getMessage();
        if (correct || msg == null || msg.contains("mailman")) {
          final String cp = correct && cmp.data() != null ? cmp.toString() : "()";
          sb.append(
              "["
                  + qu[0]
                  + "] "
                  + query
                  + "\n[E] "
                  + cp
                  + "\n[F] "
                  + (msg == null ? Util.className(ex) : msg.replaceAll("\r\n?|\n", " "))
                  + ' '
                  + details()
                  + '\n');
          ex.printStackTrace();
          ++fail;
        }
      } finally {
        qp.close();
      }
    }
    if (fail != 0) fail(fail + " Errors. [E] = expected, [F] = found:\n" + sb.toString().trim());
  }
Exemple #15
0
 /**
  * Scans a comment.
  *
  * @throws IOException I/O exception
  */
 private void comment() throws IOException {
   do {
     final int ch = nextChar();
     if (ch == '-' && consume('-')) {
       check('>');
       return;
     }
     token.add(ch);
   } while (true);
 }
Exemple #16
0
 /**
  * Scans XML text.
  *
  * @param ch current character
  * @throws IOException I/O exception
  */
 private void content(final int ch) throws IOException {
   type = Type.TEXT;
   boolean f = true;
   int c = ch;
   while (c != 0) {
     if (c != '<') {
       if (c == '&') {
         // scan entity
         final byte[] r = ref(true);
         if (r.length == 1) token.add(r);
         else if (!input.add(r, false)) error(RECENT);
       } else {
         if (c == ']') {
           // ']]>' not allowed in content
           if (consume() == ']') {
             if (consume() == '>') error(CONTCDATA);
             prev(1);
           }
           prev(1);
         }
         // add character to cached content
         token.add(c);
       }
     } else {
       if (!f && !isCDATA()) {
         text = false;
         prev(1);
         if (chop) token.trim();
         return;
       }
       cDATA();
     }
     c = consume();
     f = false;
   }
   // end of file
   if (!fragment) {
     if (!ws(token.finish())) error(AFTERROOT);
     type = Type.EOF;
   }
 }
Exemple #17
0
 /**
  * Consumes an XML name. [5]
  *
  * @param f force parsing
  * @return name
  * @throws IOException I/O exception
  */
 private byte[] name(final boolean f) throws IOException {
   final TokenBuilder name = new TokenBuilder();
   int c = consume();
   if (!isStartChar(c)) {
     if (f) error(INVNAME);
     prev(1);
     return null;
   }
   do name.add(c);
   while (isChar(c = nextChar()));
   prev(1);
   return name.finish();
 }
Exemple #18
0
 /**
  * Scans an XML tag.
  *
  * @param ch current character
  * @throws IOException I/O exception
  */
 private void scanTAG(final int ch) throws IOException {
   int c = ch;
   // scan tag end...
   if (c == '>') {
     type = Type.R_BR;
     state = State.CONTENT;
   } else if (c == '=') {
     // scan equal sign...
     type = Type.EQ;
   } else if (c == '\'' || c == '"') {
     // scan quote...
     type = Type.QUOTE;
     state = State.QUOTE;
     quote = c;
   } else if (c == '/') {
     // scan empty tag end...
     type = Type.CLOSE_R_BR;
     if ((c = nextChar()) == '>') {
       state = State.CONTENT;
     } else {
       token.add(c);
       error(CLOSING);
     }
   } else if (s(c)) {
     // scan whitespace...
     type = Type.WS;
   } else if (isStartChar(c)) {
     // scan tag name...
     type = state == State.ATT ? Type.ATTNAME : Type.ELEMNAME;
     do token.add(c);
     while (isChar(c = nextChar()));
     prev(1);
     state = State.ATT;
   } else {
     // undefined character...
     error(CHARACTER, (char) c);
   }
 }
Exemple #19
0
 /**
  * Returns an information string for an unexpected exception.
  *
  * @param ex exception
  * @return dummy object
  */
 public static String bug(final Throwable ex) {
   final TokenBuilder tb = new TokenBuilder(BUGINFO);
   tb.add(NL).add("Contact: ").add(MAIL);
   tb.add(NL).add("Version: ").add(TITLE);
   tb.add(NL).add("Java: ").add(System.getProperty("java.vendor"));
   tb.add(", ").add(System.getProperty("java.version"));
   tb.add(NL).add("OS: ").add(System.getProperty("os.name"));
   tb.add(", ").add(System.getProperty("os.arch"));
   tb.add(NL).add("Stack Trace: ");
   for (final String e : toArray(ex)) tb.add(NL).add(e);
   return tb.toString();
 }
 /**
  * Converts the path to a string array, containing the single segments.
  *
  * @param path path, or {@code null}
  * @return path depth
  */
 public static String[] toSegments(final String path) {
   final StringList sl = new StringList();
   if (path != null) {
     final TokenBuilder tb = new TokenBuilder();
     for (int s = 0; s < path.length(); s++) {
       final char ch = path.charAt(s);
       if (ch == '/') {
         if (tb.isEmpty()) continue;
         sl.add(tb.toString());
         tb.reset();
       } else {
         tb.add(ch);
       }
     }
     if (!tb.isEmpty()) sl.add(tb.toString());
   }
   return sl.toArray();
 }
Exemple #21
0
  /**
   * Creates an XQuery representation for the specified query.
   *
   * @param query query
   * @param ctx database context
   * @param root start from root node
   * @return query
   */
  public static String find(final String query, final Context ctx, final boolean root) {
    // treat input as XQuery
    if (query.startsWith("/")) return query;

    final boolean r = root || ctx.root();
    if (query.isEmpty()) return r ? "/" : ".";

    // parse user input
    final String qu = query.replaceAll(" \\+", " ");
    final String[] terms = split(qu);

    String pre = "";
    String preds = "";
    final String tag = "*";
    for (String term : terms) {
      if (term.startsWith("@=")) {
        preds += "[@* = \"" + term.substring(2) + "\"]";
      } else if (term.startsWith("=")) {
        preds += "[text() = \"" + term.substring(1) + "\"]";
      } else if (term.startsWith("~")) {
        preds += "[text() contains text \"" + term.substring(1) + "\" using fuzzy]";
      } else if (term.startsWith("@")) {
        if (term.length() == 1) continue;
        preds += "[@* contains text \"" + term.substring(1) + "\"]";
        term = term.substring(1);
        // add valid name tests
        if (XMLToken.isName(token(term))) {
          pre += (r ? "" : ".") + "//@" + term + " | ";
        }
      } else {
        preds += "[text() contains text \"" + term + "\"]";
        // add valid name tests
        if (XMLToken.isName(token(term))) {
          pre += (r ? "/" : "") + Axis.DESC + "::*:" + term + " | ";
        }
      }
    }
    if (pre.isEmpty() && preds.isEmpty()) return root ? "/" : ".";

    // create final string
    final TokenBuilder tb = new TokenBuilder();
    tb.add(pre + (r ? "/" : "") + Axis.DESCORSELF + "::" + tag + preds);
    return tb.toString();
  }
Exemple #22
0
 /**
  * Returns a string representation of the index structure.
  *
  * @param all include database contents in the representation. During updates, database lookups
  *     must be avoided, as the data structures will be inconsistent.
  * @return string
  */
 public String toString(final boolean all) {
   final TokenBuilder tb = new TokenBuilder();
   tb.addExt(type).add(" INDEX, '").add(data.meta.name).add("':\n");
   final int s = lenList.size();
   for (int m = 1; m < s; m++) {
     final int len = lenList.get(m);
     if (len == 0) continue;
     final int[] ids = idsList.get(m);
     tb.add("  ").addInt(m);
     if (all)
       tb.add(", key: \"").add(data.text(data.pre(ids[0]), type == IndexType.TEXT)).add('"');
     tb.add(", ids");
     if (all) tb.add("/pres");
     tb.add(": ");
     for (int n = 0; n < len; n++) {
       if (n != 0) tb.add(",");
       tb.addInt(ids[n]);
       if (all) tb.add('/').addInt(data.pre(ids[n]));
     }
     tb.add("\n");
   }
   return tb.toString();
 }
Exemple #23
0
 /**
  * Caches and returns all unique tokens specified in a query.
  *
  * @param list token list
  * @return token set
  */
 private TokenSet unique(final TokenList list) {
   // cache all query tokens in a set (duplicates are removed)
   final TokenSet ts = new TokenSet();
   switch (mode) {
     case ALL:
     case ANY:
       for (final byte[] t : list) ts.add(t);
       break;
     case ALL_WORDS:
     case ANY_WORD:
       final FTLexer l = new FTLexer(ftt.opt);
       for (final byte[] t : list) {
         l.init(t);
         while (l.hasNext()) ts.add(l.nextToken());
       }
       break;
     case PHRASE:
       final TokenBuilder tb = new TokenBuilder();
       for (final byte[] t : list) tb.add(t).add(' ');
       ts.add(tb.trim().finish());
   }
   return ts;
 }
Exemple #24
0
 /**
  * Scans a document encoding.
  *
  * @return encoding
  * @throws IOException I/O exception
  */
 private String encoding() throws IOException {
   if (!consume(ENCOD)) {
     if (fragment) error(TEXTENC);
     return null;
   }
   s();
   check('=');
   s();
   final TokenBuilder enc = new TokenBuilder();
   final int d = qu();
   int ch = nextChar();
   if (letter(ch) && ch != '_') {
     while (letterOrDigit(ch) || ch == '.' || ch == '-') {
       enc.add(ch);
       ch = nextChar();
     }
     prev(1);
   }
   check((char) d);
   if (enc.isEmpty()) error(DECLENCODE, enc);
   final String e = string(enc.finish());
   input.encoding(e);
   return e;
 }
Exemple #25
0
 /**
  * Throws a runtime exception for an unimplemented method.
  *
  * @param ext optional extension
  * @return runtime exception (indicates that an error is raised)
  */
 public static RuntimeException notimplemented(final Object... ext) {
   final TokenBuilder tb = new TokenBuilder("Not Implemented");
   if (ext.length != 0) tb.addExt(" (%)", ext);
   throw new UnsupportedOperationException(tb.add('.').toString());
 }
Exemple #26
0
 /**
  * Returns a string representation of a path summary node.
  *
  * @param data data reference
  * @param level level
  * @return string representation
  */
 byte[] info(final Data data, final int level) {
   final TokenBuilder tb = new TokenBuilder();
   if (level != 0) tb.add(Text.NL);
   for (int i = 0; i < level << 1; ++i) tb.add(' ');
   switch (kind) {
     case Data.DOC:
       tb.add(DOC);
       break;
     case Data.ELEM:
       tb.add(data.elemNames.key(name));
       break;
     case Data.TEXT:
       tb.add(TEXT);
       break;
     case Data.ATTR:
       tb.add(ATT);
       tb.add(data.attrNames.key(name));
       break;
     case Data.COMM:
       tb.add(COMMENT);
       break;
     case Data.PI:
       tb.add(PI);
       break;
   }
   tb.add(": " + stats);
   for (final PathNode p : children) tb.add(p.info(data, level + 1));
   return tb.finish();
 }