static { RSHIPS.put("NT", "BT"); RSHIPS.put("BT", "BT"); RSHIPS.put("BTG", "NTG"); RSHIPS.put("NTG", "BTG"); RSHIPS.put("BTP", "NTP"); RSHIPS.put("NTP", "BTP"); RSHIPS.put("USE", "UF"); RSHIPS.put("UF", "USE"); RSHIPS.put("RT", "RT"); }
/** * Scans a PEReference. [69] * * @return entity * @throws IOException I/O exception */ private byte[] peRef() throws IOException { // scans predefined entities final byte[] name = name(true); consume(';'); final byte[] en = pents.get(name); if (en != null) return en; return name; }
/** * Scans a reference. [67] * * @param f dissolve entities * @return entity * @throws IOException I/O exception */ private byte[] ref(final boolean f) throws IOException { // scans numeric entities if (consume('#')) { // [66] final TokenBuilder ent = new TokenBuilder(); int b = 10; int ch = nextChar(); ent.add(ch); if (ch == 'x') { b = 16; ent.add(ch = nextChar()); } int n = 0; do { final boolean m = ch >= '0' && ch <= '9'; final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F'); if (!m && !h) { completeRef(ent); return QUESTION; } n *= b; n += ch & 15; if (!m) n += 9; ent.add(ch = nextChar()); } while (ch != ';'); if (!valid(n)) return QUESTION; ent.reset(); ent.add(n); return ent.finish(); } // scans predefined entities [68] final byte[] name = name(false); if (!consume(';')) return QUESTION; if (!f) return concat(AMPER, name, SEMI); byte[] en = ents.get(name); if (en == null) { // unknown entity: try HTML entities (lazy initialization) if (HTMLENTS.size() == 0) { for (int s = 0; s < HTMLENTITIES.length; s += 2) { HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1])); } } en = HTMLENTS.get(name); } return en == null ? QUESTION : en; }
/** * Reads next character or throws an exception if all bytes have been read. * * @return next character * @throws IOException I/O exception */ private int consume() throws IOException { while (true) { final int ch = input.read(); if (ch == -1) return 0; if (ch == '%' && pe) { // [69] final byte[] key = name(true); final byte[] val = pents.get(key); if (val == null) error(UNKNOWNPE, key); check(';'); input.add(val, true); } else { return ch; } } }
/** * Builds the thesaurus. * * @param value input nodes * @throws QueryException query exception */ private void build(final Value value) throws QueryException { final Value synonyms = nodes("*:synonym", value); if (synonyms.isEmpty()) return; final ThesNode term = node(text("*:term", value)); for (final Item synonym : synonyms) { final ThesNode sterm = node(text("*:term", synonym)); final byte[] rs = text("*:relationship", synonym); term.add(sterm, rs); final byte[] srs = RSHIPS.get(rs); if (srs != null) sterm.add(term, srs); build(synonyms); } }
/** * Initializes the scanner. * * @param f input file * @param pr database properties * @param frag allow parsing of document fragment * @throws IOException I/O exception */ XMLScanner(final IO f, final Prop pr, final boolean frag) throws IOException { input = new XMLInput(f); fragment = frag; try { for (int e = 0; e < ENTITIES.length; e += 2) { ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1])); } dtd = pr.is(Prop.DTD); chop = pr.is(Prop.CHOP); String enc = null; // process document declaration... if (consume(DOCDECL)) { if (s()) { if (!version()) error(DECLSTART); boolean s = s(); enc = encoding(); if (enc != null) { if (!s) error(WSERROR); s = s(); } if (sddecl() != null && !s) error(WSERROR); s(); final int ch = nextChar(); if (ch != '?' || nextChar() != '>') error(DECLWRONG); } else { prev(5); } } encoding = enc == null ? UTF8 : enc; if (!fragment) { final int n = consume(); if (!s(n)) { if (n != '<') error(BEFOREROOT); prev(1); } } } catch (final IOException ex) { input.close(); throw ex; } }
/** * Inserts a data instance at the specified pre value. Note that the specified data instance must * differ from this instance. * * @param ipre value at which to insert new data * @param ipar parent pre value of node * @param clip data clip */ public final void insert(final int ipre, final int ipar, final DataClip clip) { meta.update(); // update value and document indexes if (meta.updindex) indexBegin(); resources.insert(ipre, clip); final int dsize = clip.size(); final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER); // resize buffer to cache more entries buffer(buf); // find all namespaces in scope to avoid duplicate declarations final TokenMap nsScope = nspaces.scope(ipar, this); // loop through all entries final IntList preStack = new IntList(); final NSNode nsRoot = nspaces.current(); final HashSet<NSNode> newNodes = new HashSet<NSNode>(); final IntList flagPres = new IntList(); // indicates if database only contains a dummy node final Data data = clip.data; int c = 0; for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) { if (c != 0 && c % buf == 0) insert(ipre + c - buf); final int pre = ipre + c; final int dkind = data.kind(dpre); final int dpar = data.parent(dpre, dkind); // ipar < 0 if document nodes on top level are added final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0; final int par = dis == 0 ? -1 : pre - dis; if (c == 0) nspaces.root(par, this); while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop()); switch (dkind) { case DOC: // add document nspaces.prepare(); final int s = data.size(dpre, dkind); doc(pre, s, data.text(dpre, true)); meta.ndocs++; preStack.push(pre); break; case ELEM: // add element nspaces.prepare(); boolean ne = false; if (data.nsFlag(dpre)) { final Atts at = data.ns(dpre); for (int a = 0; a < at.size(); ++a) { // see if prefix has been declared/ is part of current ns scope final byte[] old = nsScope.get(at.name(a)); if (old == null || !eq(old, at.value(a))) { // we have to keep track of all new NSNodes that are added // to the Namespace structure, as their pre values must not // be updated. I.e. if an NSNode N with pre value 3 existed // prior to inserting and two new nodes are inserted at // location pre == 3 we have to make sure N and only N gets // updated. newNodes.add(nspaces.add(at.name(a), at.value(a), pre)); ne = true; } } } byte[] nm = data.name(dpre, dkind); elem( dis, tagindex.index(nm, null, false), data.attSize(dpre, dkind), data.size(dpre, dkind), nspaces.uri(nm, true), ne); preStack.push(pre); break; case TEXT: case COMM: case PI: // add text text(pre, dis, data.text(dpre, true), dkind); break; case ATTR: // add attribute nm = data.name(dpre, dkind); // check if prefix already in nsScope or not final byte[] attPref = prefix(nm); // check if prefix of attribute has already been declared, otherwise // add declaration to parent node if (data.nsFlag(dpre) && nsScope.get(attPref) == null) { nspaces.add( par, preStack.isEmpty() ? -1 : preStack.peek(), attPref, data.nspaces.uri(data.uri(dpre, dkind)), this); // save pre value to set ns flag later for this node. can't be done // here as direct table access would interfere with the buffer flagPres.add(par); } attr( pre, dis, atnindex.index(nm, null, false), data.text(dpre, false), nspaces.uri(nm, false), false); break; } } // finalize and update namespace structure while (!preStack.isEmpty()) nspaces.close(preStack.pop()); nspaces.root(nsRoot); if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf); // reset buffer to old size buffer(1); // set ns flags for (int f = 0; f < flagPres.size(); f++) { final int fl = flagPres.get(f); table.write2(fl, 1, name(fl) | 1 << 15); } // increase size of ancestors int p = ipar; while (p >= 0) { final int k = kind(p); size(p, k, size(p, k) + dsize); p = parent(p, k); } if (meta.updindex) { // add the entries to the ID -> PRE mapping: idmap.insert(ipre, id(ipre), dsize); indexEnd(); } if (!cache) updateDist(ipre + dsize, dsize); // propagate PRE value shifts to namespaces if (ipar != -1) nspaces.insert(ipre, dsize, newNodes); }
/** * Scans a markup declaration. [29] * * @return true if a declaration was found * @throws IOException I/O exception */ private boolean markupDecl() throws IOException { if (consume(ENT)) { // [70] checkS(); if (consume('%')) { // [72] PEDecl checkS(); final byte[] key = name(true); checkS(); byte[] val = entityValue(true); // [74] if (val == null) { val = externalID(true, false); if (val == null) error(INVEND); } s(); pents.add(key, val); } else { // [71] GEDecl final byte[] key = name(true); checkS(); byte[] val = entityValue(false); // [73] EntityDef if (val == null) { val = externalID(true, false); if (val == null) error(INVEND); if (s()) { check(ND); checkS(); name(true); } } s(); ents.add(key, val); } check('>'); pe = true; } else if (consume(ELEM)) { // [45] checkS(); name(true); checkS(); pe = true; if (!consume(EMP) && !consume(ANY)) { // [46] if (consume('(')) { s(); if (consume(PC)) { // [51] s(); boolean alt = false; while (consume('|')) { s(); name(true); s(); alt = true; } check(')'); if (!consume('*') && alt) error(INVEND); } else { cp(); s(); // check(')'); // to be fixed... while (!consume(')')) consume(); // input.prev(1); occ(); } } else { error(INVEND); } } s(); check('>'); } else if (consume(ATTL)) { // [52] pe = true; checkS(); name(true); s(); while (name(false) != null) { // [53] checkS(); if (!consume(CD) && !consume(IDRS) && !consume(IDR) && !consume(ID) && !consume(ENTS) && !consume(ENT1) && !consume(NMTS) && !consume(NMT)) { // [56] if (consume(NOT)) { // [57,58] checkS(); check('('); s(); name(true); s(); while (consume('|')) { s(); name(true); s(); } check(')'); } else { // [59] check('('); s(); nmtoken(); s(); while (consume('|')) { s(); nmtoken(); s(); } check(')'); } } // [54] pe = true; checkS(); if (!consume(REQ) && !consume(IMP)) { // [60] if (consume(FIX)) checkS(); quote = qu(); attValue(consume()); } s(); } check('>'); } else if (consume(NOTA)) { // [82] checkS(); name(true); s(); externalID(false, false); s(); check('>'); } else if (consume(COMS)) { comment(); } else if (consume(XML)) { pi(); } else { return false; } s(); pe = false; return true; }