/** * Scans an entity value. [9] * * @param p pe reference flag * @return value * @throws IOException I/O exception */ private byte[] entityValue(final boolean p) throws IOException { final int qu = consume(); if (qu != '\'' && qu != '"') { prev(1); return null; } TokenBuilder tok = new TokenBuilder(); int ch; while ((ch = nextChar()) != qu) { if (ch == '&') tok.add(ref(false)); else if (ch == '%') { if (!p) error(INVPE); tok.add(peRef()); } else { tok.add(ch); } } final XMLInput tmp = input; input = new XMLInput(new IOContent(tok.finish())); tok = new TokenBuilder(); while ((ch = consume()) != 0) { if (ch == '&') tok.add(ref(false)); else tok.add(ch); } input = tmp; return tok.finish(); }
/** * Joins the path. * * @param s segment to start with * @return joined path */ private String join(final int s) { final TokenBuilder tb = new TokenBuilder(); for (int p = s; p < segments.length; p++) { if (!tb.isEmpty()) tb.add('/'); tb.add(segments[p]); } return tb.toString(); }
/** * Returns the database path (i.e., all path entries except for the first). * * @return path depth */ public String dbpath() { final TokenBuilder tb = new TokenBuilder(); final int ps = segments.length; for (int p = 1; p < ps; p++) { if (!tb.isEmpty()) tb.add('/'); tb.add(segments[p]); } return tb.toString(); }
/** * Creates an XQuery representation for the specified table query. * * @param filter filter terms * @param cols filter columns * @param elem element flag * @param name name of root element * @param root root flag * @return query */ public static String findTable( final StringList filter, final TokenList cols, final BoolList elem, final byte[] name, final boolean root) { final TokenBuilder tb = new TokenBuilder(); final int is = filter.size(); for (int i = 0; i < is; ++i) { final String[] spl = split(filter.get(i)); for (final String s : spl) { final byte[] term = trim(replace(token(s), '"', ' ')); if (term.length == 0) continue; tb.add('['); final boolean elm = elem.get(i); tb.add(elm ? ".//" : "@"); tb.add("*:"); tb.add(cols.get(i)); if (term[0] == '<' || term[0] == '>') { tb.add(term[0]); tb.addLong(calcNum(substring(term, 1))); } else { tb.add(" contains text \""); tb.add(term); tb.add('"'); } tb.add(']'); } } return tb.isEmpty() ? "/" : (root ? "/" : "") + Axis.DESCORSELF + "::*:" + string(name) + tb; }
/** * Scans CDATA. * * @throws IOException I/O exception */ private void cDATA() throws IOException { int ch; while (true) { while ((ch = nextChar()) != ']') token.add(ch); if (consume(']')) { if (consume('>')) return; prev(1); } token.add(ch); } }
@Override public synchronized byte[] info(final MainOptions options) { final TokenBuilder tb = new TokenBuilder(); final long l = inX.length() + inY.length() + inZ.length(); tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL); tb.add(LI_SIZE + Performance.format(l, true) + NL); final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT)); addOccs(stats); stats.print(tb); return tb.finish(); }
/** * Scans a reference. [67] * * @param f dissolve entities * @return entity * @throws IOException I/O exception */ private byte[] ref(final boolean f) throws IOException { // scans numeric entities if (consume('#')) { // [66] final TokenBuilder ent = new TokenBuilder(); int b = 10; int ch = nextChar(); ent.add(ch); if (ch == 'x') { b = 16; ent.add(ch = nextChar()); } int n = 0; do { final boolean m = ch >= '0' && ch <= '9'; final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F'); if (!m && !h) { completeRef(ent); return QUESTION; } n *= b; n += ch & 15; if (!m) n += 9; ent.add(ch = nextChar()); } while (ch != ';'); if (!valid(n)) return QUESTION; ent.reset(); ent.add(n); return ent.finish(); } // scans predefined entities [68] final byte[] name = name(false); if (!consume(';')) return QUESTION; if (!f) return concat(AMPER, name, SEMI); byte[] en = ents.get(name); if (en == null) { // unknown entity: try HTML entities (lazy initialization) if (HTMLENTS.size() == 0) { for (int s = 0; s < HTMLENTITIES.length; s += 2) { HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1])); } } en = HTMLENTS.get(name); } return en == null ? QUESTION : en; }
@Override public byte[] info(final MainOptions options) { final TokenBuilder tb = new TokenBuilder(); tb.add(LI_STRUCTURE).add(HASH).add(NL); tb.add(LI_NAMES).add(data.meta.names(type)).add(NL); final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT)); final int s = values.size(); for (int p = 1; p <= s; p++) { final int oc = lenList.get(p); if (oc > 0 && stats.adding(oc)) stats.add(values.key(p), oc); } stats.print(tb); return tb.finish(); }
/** * Consumes an Nmtoken. [7] * * @throws IOException I/O exception */ private void nmtoken() throws IOException { final TokenBuilder name = new TokenBuilder(); int c; while (isChar(c = nextChar())) name.add(c); prev(1); if (name.isEmpty()) error(INVNAME); }
/** * Scans a processing instruction. * * @throws IOException I/O exception */ private void pi() throws IOException { final byte[] tok = name(true); if (eq(lc(tok), XML)) error(PIRES); token.add(tok); int ch = nextChar(); if (ch != '?' && !ws(ch)) error(PITEXT); do { while (ch != '?') { token.add(ch); ch = nextChar(); } if ((ch = consume()) == '>') return; token.add('?'); } while (true); }
/** * Adds some characters to the entity. * * @param ent token builder * @throws IOException I/O exception */ private void completeRef(final TokenBuilder ent) throws IOException { int ch = consume(); while (ent.size() < 10 && ch >= ' ' && ch != ';') { ent.add(ch); ch = consume(); } }
/** * Scans an attribute value. [10] * * @param ch current character * @throws IOException I/O exception */ private void attValue(final int ch) throws IOException { boolean wrong = false; int c = ch; do { if (c == 0) error(ATTCLOSE, (char) c); wrong |= c == '\'' || c == '"'; if (c == '<') error(wrong ? ATTCLOSE : ATTCHAR, (char) c); if (c == 0x0A) c = ' '; if (c == '&') { // verify... final byte[] r = ref(true); if (r.length == 1) token.add(r); else if (!input.add(r, false)) error(RECENT); } else { token.add(c); } } while ((c = consume()) != quote); }
/** * Scans an external ID. * * @param f full flag * @param r root flag * @return id * @throws IOException I/O exception */ private byte[] externalID(final boolean f, final boolean r) throws IOException { byte[] cont = null; final boolean pub = consume(PUBLIC); if (pub || consume(SYSTEM)) { checkS(); if (pub) { pubidLit(); if (f) checkS(); } final int qu = consume(); // [11] if (qu == '\'' || qu == '"') { int ch; final TokenBuilder tok = new TokenBuilder(); while ((ch = nextChar()) != qu) tok.add(ch); if (!f) return null; final String name = string(tok.finish()); if (!dtd && r) return cont; final XMLInput tin = input; try { final IO file = input.io().merge(name); cont = file.read(); } catch (final IOException ex) { Util.debug(ex); // skip unknown DTDs/entities cont = new byte[] {'?'}; } input = new XMLInput(new IOContent(cont, name)); if (consume(XDECL)) { check(XML); s(); if (version()) checkS(); s(); if (encoding() == null) error(TEXTENC); ch = nextChar(); if (s(ch)) ch = nextChar(); if (ch != '?') error(WRONGCHAR, '?', ch); ch = nextChar(); if (ch != '>') error(WRONGCHAR, '>', ch); cont = Arrays.copyOfRange(cont, input.pos(), cont.length); } s(); if (r) { extSubsetDecl(); if (!consume((char) 0)) error(INVEND); } input = tin; } else { if (f) error(SCANQUOTE, (char) qu); prev(1); } } return cont; }
/** Tests the specified instance. */ @Test public void test() { final StringBuilder sb = new StringBuilder(); int fail = 0; for (final Object[] qu : queries) { final boolean correct = qu.length == 3; final String query = qu[correct ? 2 : 1].toString(); final Value cmp = correct ? (Value) qu[1] : null; final QueryProcessor qp = new QueryProcessor(query, context); try { final Value val = qp.value(); if (!correct || !new DeepCompare().equal(val, cmp)) { sb.append("[" + qu[0] + "] " + query); String s = correct && cmp.size() != 1 ? "#" + cmp.size() : ""; sb.append("\n[E" + s + "] "); if (correct) { final String cp = cmp.toString(); sb.append('\''); sb.append(cp.length() > 1000 ? cp.substring(0, 1000) + "..." : cp); sb.append('\''); } else { sb.append("error"); } final TokenBuilder types = new TokenBuilder(); for (final Item it : val) types.add(it.type.toString()).add(" "); s = val.size() == 1 ? "" : "#" + val.size(); sb.append("\n[F" + s + "] '" + val + "', " + types + details() + '\n'); ++fail; } } catch (final Exception ex) { final String msg = ex.getMessage(); if (correct || msg == null || msg.contains("mailman")) { final String cp = correct && cmp.data() != null ? cmp.toString() : "()"; sb.append( "[" + qu[0] + "] " + query + "\n[E] " + cp + "\n[F] " + (msg == null ? Util.className(ex) : msg.replaceAll("\r\n?|\n", " ")) + ' ' + details() + '\n'); ex.printStackTrace(); ++fail; } } finally { qp.close(); } } if (fail != 0) fail(fail + " Errors. [E] = expected, [F] = found:\n" + sb.toString().trim()); }
/** * Scans a comment. * * @throws IOException I/O exception */ private void comment() throws IOException { do { final int ch = nextChar(); if (ch == '-' && consume('-')) { check('>'); return; } token.add(ch); } while (true); }
/** * Scans XML text. * * @param ch current character * @throws IOException I/O exception */ private void content(final int ch) throws IOException { type = Type.TEXT; boolean f = true; int c = ch; while (c != 0) { if (c != '<') { if (c == '&') { // scan entity final byte[] r = ref(true); if (r.length == 1) token.add(r); else if (!input.add(r, false)) error(RECENT); } else { if (c == ']') { // ']]>' not allowed in content if (consume() == ']') { if (consume() == '>') error(CONTCDATA); prev(1); } prev(1); } // add character to cached content token.add(c); } } else { if (!f && !isCDATA()) { text = false; prev(1); if (chop) token.trim(); return; } cDATA(); } c = consume(); f = false; } // end of file if (!fragment) { if (!ws(token.finish())) error(AFTERROOT); type = Type.EOF; } }
/** * Consumes an XML name. [5] * * @param f force parsing * @return name * @throws IOException I/O exception */ private byte[] name(final boolean f) throws IOException { final TokenBuilder name = new TokenBuilder(); int c = consume(); if (!isStartChar(c)) { if (f) error(INVNAME); prev(1); return null; } do name.add(c); while (isChar(c = nextChar())); prev(1); return name.finish(); }
/** * Scans an XML tag. * * @param ch current character * @throws IOException I/O exception */ private void scanTAG(final int ch) throws IOException { int c = ch; // scan tag end... if (c == '>') { type = Type.R_BR; state = State.CONTENT; } else if (c == '=') { // scan equal sign... type = Type.EQ; } else if (c == '\'' || c == '"') { // scan quote... type = Type.QUOTE; state = State.QUOTE; quote = c; } else if (c == '/') { // scan empty tag end... type = Type.CLOSE_R_BR; if ((c = nextChar()) == '>') { state = State.CONTENT; } else { token.add(c); error(CLOSING); } } else if (s(c)) { // scan whitespace... type = Type.WS; } else if (isStartChar(c)) { // scan tag name... type = state == State.ATT ? Type.ATTNAME : Type.ELEMNAME; do token.add(c); while (isChar(c = nextChar())); prev(1); state = State.ATT; } else { // undefined character... error(CHARACTER, (char) c); } }
/** * Returns an information string for an unexpected exception. * * @param ex exception * @return dummy object */ public static String bug(final Throwable ex) { final TokenBuilder tb = new TokenBuilder(BUGINFO); tb.add(NL).add("Contact: ").add(MAIL); tb.add(NL).add("Version: ").add(TITLE); tb.add(NL).add("Java: ").add(System.getProperty("java.vendor")); tb.add(", ").add(System.getProperty("java.version")); tb.add(NL).add("OS: ").add(System.getProperty("os.name")); tb.add(", ").add(System.getProperty("os.arch")); tb.add(NL).add("Stack Trace: "); for (final String e : toArray(ex)) tb.add(NL).add(e); return tb.toString(); }
/** * Converts the path to a string array, containing the single segments. * * @param path path, or {@code null} * @return path depth */ public static String[] toSegments(final String path) { final StringList sl = new StringList(); if (path != null) { final TokenBuilder tb = new TokenBuilder(); for (int s = 0; s < path.length(); s++) { final char ch = path.charAt(s); if (ch == '/') { if (tb.isEmpty()) continue; sl.add(tb.toString()); tb.reset(); } else { tb.add(ch); } } if (!tb.isEmpty()) sl.add(tb.toString()); } return sl.toArray(); }
/** * Creates an XQuery representation for the specified query. * * @param query query * @param ctx database context * @param root start from root node * @return query */ public static String find(final String query, final Context ctx, final boolean root) { // treat input as XQuery if (query.startsWith("/")) return query; final boolean r = root || ctx.root(); if (query.isEmpty()) return r ? "/" : "."; // parse user input final String qu = query.replaceAll(" \\+", " "); final String[] terms = split(qu); String pre = ""; String preds = ""; final String tag = "*"; for (String term : terms) { if (term.startsWith("@=")) { preds += "[@* = \"" + term.substring(2) + "\"]"; } else if (term.startsWith("=")) { preds += "[text() = \"" + term.substring(1) + "\"]"; } else if (term.startsWith("~")) { preds += "[text() contains text \"" + term.substring(1) + "\" using fuzzy]"; } else if (term.startsWith("@")) { if (term.length() == 1) continue; preds += "[@* contains text \"" + term.substring(1) + "\"]"; term = term.substring(1); // add valid name tests if (XMLToken.isName(token(term))) { pre += (r ? "" : ".") + "//@" + term + " | "; } } else { preds += "[text() contains text \"" + term + "\"]"; // add valid name tests if (XMLToken.isName(token(term))) { pre += (r ? "/" : "") + Axis.DESC + "::*:" + term + " | "; } } } if (pre.isEmpty() && preds.isEmpty()) return root ? "/" : "."; // create final string final TokenBuilder tb = new TokenBuilder(); tb.add(pre + (r ? "/" : "") + Axis.DESCORSELF + "::" + tag + preds); return tb.toString(); }
/** * Returns a string representation of the index structure. * * @param all include database contents in the representation. During updates, database lookups * must be avoided, as the data structures will be inconsistent. * @return string */ public String toString(final boolean all) { final TokenBuilder tb = new TokenBuilder(); tb.addExt(type).add(" INDEX, '").add(data.meta.name).add("':\n"); final int s = lenList.size(); for (int m = 1; m < s; m++) { final int len = lenList.get(m); if (len == 0) continue; final int[] ids = idsList.get(m); tb.add(" ").addInt(m); if (all) tb.add(", key: \"").add(data.text(data.pre(ids[0]), type == IndexType.TEXT)).add('"'); tb.add(", ids"); if (all) tb.add("/pres"); tb.add(": "); for (int n = 0; n < len; n++) { if (n != 0) tb.add(","); tb.addInt(ids[n]); if (all) tb.add('/').addInt(data.pre(ids[n])); } tb.add("\n"); } return tb.toString(); }
/** * Caches and returns all unique tokens specified in a query. * * @param list token list * @return token set */ private TokenSet unique(final TokenList list) { // cache all query tokens in a set (duplicates are removed) final TokenSet ts = new TokenSet(); switch (mode) { case ALL: case ANY: for (final byte[] t : list) ts.add(t); break; case ALL_WORDS: case ANY_WORD: final FTLexer l = new FTLexer(ftt.opt); for (final byte[] t : list) { l.init(t); while (l.hasNext()) ts.add(l.nextToken()); } break; case PHRASE: final TokenBuilder tb = new TokenBuilder(); for (final byte[] t : list) tb.add(t).add(' '); ts.add(tb.trim().finish()); } return ts; }
/** * Scans a document encoding. * * @return encoding * @throws IOException I/O exception */ private String encoding() throws IOException { if (!consume(ENCOD)) { if (fragment) error(TEXTENC); return null; } s(); check('='); s(); final TokenBuilder enc = new TokenBuilder(); final int d = qu(); int ch = nextChar(); if (letter(ch) && ch != '_') { while (letterOrDigit(ch) || ch == '.' || ch == '-') { enc.add(ch); ch = nextChar(); } prev(1); } check((char) d); if (enc.isEmpty()) error(DECLENCODE, enc); final String e = string(enc.finish()); input.encoding(e); return e; }
/** * Throws a runtime exception for an unimplemented method. * * @param ext optional extension * @return runtime exception (indicates that an error is raised) */ public static RuntimeException notimplemented(final Object... ext) { final TokenBuilder tb = new TokenBuilder("Not Implemented"); if (ext.length != 0) tb.addExt(" (%)", ext); throw new UnsupportedOperationException(tb.add('.').toString()); }
/** * Returns a string representation of a path summary node. * * @param data data reference * @param level level * @return string representation */ byte[] info(final Data data, final int level) { final TokenBuilder tb = new TokenBuilder(); if (level != 0) tb.add(Text.NL); for (int i = 0; i < level << 1; ++i) tb.add(' '); switch (kind) { case Data.DOC: tb.add(DOC); break; case Data.ELEM: tb.add(data.elemNames.key(name)); break; case Data.TEXT: tb.add(TEXT); break; case Data.ATTR: tb.add(ATT); tb.add(data.attrNames.key(name)); break; case Data.COMM: tb.add(COMMENT); break; case Data.PI: tb.add(PI); break; } tb.add(": " + stats); for (final PathNode p : children) tb.add(p.info(data, level + 1)); return tb.finish(); }