/** * Returns a pre value. * * @param id unique node id * @return pre value or -1 if id was not found */ final int preold(final int id) { // find pre value in table for (int p = Math.max(0, id); p < meta.size; ++p) if (id == id(p)) return p; final int ps = Math.min(meta.size, id); for (int p = 0; p < ps; ++p) if (id == id(p)) return p; // id not found return -1; }
@Override public DiskData build() throws IOException { meta.assign(parser); meta.dirty = true; // calculate optimized output buffer sizes to reduce disk fragmentation final Runtime rt = Runtime.getRuntime(); final long max = Math.min(1 << 22, rt.maxMemory() - rt.freeMemory() >> 2); int bs = (int) Math.min(meta.filesize, max); bs = Math.max(IO.BLOCKSIZE, bs - bs % IO.BLOCKSIZE); // drop old database (if available) and create new one DropDB.drop(dbname, sopts); sopts.dbpath(dbname).md(); elemNames = new Names(meta); attrNames = new Names(meta); try { tout = new DataOutput(new TableOutput(meta, DATATBL)); xout = new DataOutput(meta.dbfile(DATATXT), bs); vout = new DataOutput(meta.dbfile(DATAATV), bs); sout = new DataOutput(meta.dbfile(DATATMP), bs); final Performance perf = Prop.debug ? new Performance() : null; Util.debug(tit() + DOTS); parse(); if (Prop.debug) Util.errln(" " + perf + " (" + Performance.getMemory() + ')'); } catch (final IOException ex) { try { close(); } catch (final IOException ignored) { } throw ex; } close(); // copy temporary values into database table try (final DataInput in = new DataInput(meta.dbfile(DATATMP))) { final TableAccess ta = new TableDiskAccess(meta, true); for (; spos < ssize; ++spos) ta.write4(in.readNum(), 8, in.readNum()); ta.close(); } meta.dbfile(DATATMP).delete(); // return database instance return new DiskData(meta, elemNames, attrNames, path, ns); }
/** * Adds an attribute entry to the internal update buffer. * * @param pre pre value * @param dist parent distance * @param name attribute name * @param value attribute value * @param uri namespace uri reference * @param ne namespace flag */ public final void attr( final int pre, final int dist, final int name, final byte[] value, final int uri, final boolean ne) { // add attribute to text storage final int i = newID(); final long v = index(pre, i, value, ATTR); final int n = ne ? 1 << 7 : 0; s(Math.min(IO.MAXATTS, dist) << 3 | ATTR); s(n | (byte) (name >> 8)); s(name); s(v >> 32); s(v >> 24); s(v >> 16); s(v >> 8); s(v); s(0); s(0); s(0); s(uri); s(i >> 24); s(i >> 16); s(i >> 8); s(i); }
/** * Adds an element entry to the internal update buffer. * * @param dist parent distance * @param name tag name index * @param asize number of attributes * @param size node size * @param uri namespace uri reference * @param ne namespace flag */ public final void elem( final int dist, final int name, final int asize, final int size, final int uri, final boolean ne) { // build and insert new entry final int i = newID(); final int n = ne ? 1 << 7 : 0; s(Math.min(IO.MAXATTS, asize) << 3 | ELEM); s(n | (byte) (name >> 8)); s(name); s(uri); s(dist >> 24); s(dist >> 16); s(dist >> 8); s(dist); s(size >> 24); s(size >> 16); s(size >> 8); s(size); s(i >> 24); s(i >> 16); s(i >> 8); s(i); }
/** * Performs a wildcard search for the specified token. * * @param token token to look for * @return iterator */ private synchronized IndexIterator wc(final byte[] token) { final FTIndexIterator it = FTIndexIterator.FTEMPTY; final FTWildcard wc = new FTWildcard(token); if (!wc.parse()) return it; final IntList pr = new IntList(); final IntList ps = new IntList(); final byte[] pref = wc.prefix(); final int pl = pref.length, tl = tp.length; final int l = Math.min(tl - 1, wc.max()); for (int ti = pl; ti <= l; ti++) { int i = tp[ti]; if (i == -1) continue; int c = ti + 1; int e = -1; while (c < tl && e == -1) e = tp[c++]; i = find(pref, i, e, ti); while (i < e) { final byte[] t = inY.readBytes(i, ti); if (!startsWith(t, pref)) break; if (wc.match(t)) { inZ.cursor(pointer(i, ti)); final int s = size(i, ti); for (int d = 0; d < s; d++) { pr.add(inZ.readNum()); ps.add(inZ.readNum()); } } i += ti + ENTRY; } } return iter(new FTCache(pr, ps), token); }
/** * Performs a fuzzy search for the specified token with a maximum number of errors. * * @param token token to look for * @param k number of errors allowed * @return iterator */ private synchronized IndexIterator fuzzy(final byte[] token, final int k) { FTIndexIterator it = FTIndexIterator.FTEMPTY; final int tokl = token.length, tl = tp.length; final int e = Math.min(tl - 1, tokl + k); int s = Math.max(1, tokl - k) - 1; while (++s <= e) { int p = tp[s]; if (p == -1) continue; int t = s + 1, r = -1; while (t < tl && r == -1) r = tp[t++]; while (p < r) { if (ls.similar(inY.readBytes(p, s), token, k)) { it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it); } p += s + ENTRY; } } return it; }
@Override public synchronized int costs(final IndexToken it) { final byte[] tok = it.get(); if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE; // estimate costs for queries which stretch over multiple index entries final FTOpt opt = ((FTLexer) it).ftOpt(); if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4); return entry(tok).size; }
/** * Inserts a data instance at the specified pre value. Note that the specified data instance must * differ from this instance. * * @param ipre value at which to insert new data * @param ipar parent pre value of node * @param clip data clip */ public final void insert(final int ipre, final int ipar, final DataClip clip) { meta.update(); // update value and document indexes if (meta.updindex) indexBegin(); resources.insert(ipre, clip); final int dsize = clip.size(); final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER); // resize buffer to cache more entries buffer(buf); // find all namespaces in scope to avoid duplicate declarations final TokenMap nsScope = nspaces.scope(ipar, this); // loop through all entries final IntList preStack = new IntList(); final NSNode nsRoot = nspaces.current(); final HashSet<NSNode> newNodes = new HashSet<NSNode>(); final IntList flagPres = new IntList(); // indicates if database only contains a dummy node final Data data = clip.data; int c = 0; for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) { if (c != 0 && c % buf == 0) insert(ipre + c - buf); final int pre = ipre + c; final int dkind = data.kind(dpre); final int dpar = data.parent(dpre, dkind); // ipar < 0 if document nodes on top level are added final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0; final int par = dis == 0 ? -1 : pre - dis; if (c == 0) nspaces.root(par, this); while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop()); switch (dkind) { case DOC: // add document nspaces.prepare(); final int s = data.size(dpre, dkind); doc(pre, s, data.text(dpre, true)); meta.ndocs++; preStack.push(pre); break; case ELEM: // add element nspaces.prepare(); boolean ne = false; if (data.nsFlag(dpre)) { final Atts at = data.ns(dpre); for (int a = 0; a < at.size(); ++a) { // see if prefix has been declared/ is part of current ns scope final byte[] old = nsScope.get(at.name(a)); if (old == null || !eq(old, at.value(a))) { // we have to keep track of all new NSNodes that are added // to the Namespace structure, as their pre values must not // be updated. I.e. if an NSNode N with pre value 3 existed // prior to inserting and two new nodes are inserted at // location pre == 3 we have to make sure N and only N gets // updated. newNodes.add(nspaces.add(at.name(a), at.value(a), pre)); ne = true; } } } byte[] nm = data.name(dpre, dkind); elem( dis, tagindex.index(nm, null, false), data.attSize(dpre, dkind), data.size(dpre, dkind), nspaces.uri(nm, true), ne); preStack.push(pre); break; case TEXT: case COMM: case PI: // add text text(pre, dis, data.text(dpre, true), dkind); break; case ATTR: // add attribute nm = data.name(dpre, dkind); // check if prefix already in nsScope or not final byte[] attPref = prefix(nm); // check if prefix of attribute has already been declared, otherwise // add declaration to parent node if (data.nsFlag(dpre) && nsScope.get(attPref) == null) { nspaces.add( par, preStack.isEmpty() ? -1 : preStack.peek(), attPref, data.nspaces.uri(data.uri(dpre, dkind)), this); // save pre value to set ns flag later for this node. can't be done // here as direct table access would interfere with the buffer flagPres.add(par); } attr( pre, dis, atnindex.index(nm, null, false), data.text(dpre, false), nspaces.uri(nm, false), false); break; } } // finalize and update namespace structure while (!preStack.isEmpty()) nspaces.close(preStack.pop()); nspaces.root(nsRoot); if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf); // reset buffer to old size buffer(1); // set ns flags for (int f = 0; f < flagPres.size(); f++) { final int fl = flagPres.get(f); table.write2(fl, 1, name(fl) | 1 << 15); } // increase size of ancestors int p = ipar; while (p >= 0) { final int k = kind(p); size(p, k, size(p, k) + dsize); p = parent(p, k); } if (meta.updindex) { // add the entries to the ID -> PRE mapping: idmap.insert(ipre, id(ipre), dsize); indexEnd(); } if (!cache) updateDist(ipre + dsize, dsize); // propagate PRE value shifts to namespaces if (ipar != -1) nspaces.insert(ipre, dsize, newNodes); }