/** * Calculates the new distance value for the given node. * * @param preAfter the current PRE value of the node (after structural updates have been applied) * @return new distance for the given node */ private int calculateNewDistance(final int preAfter) { final int kind = data.kind(preAfter); final int distanceBefore = data.dist(preAfter, kind); final int preBefore = calculatePreValue(preAfter, true); final int parentBefore = preBefore - distanceBefore; final int parentAfter = calculatePreValue(parentBefore, false); return preAfter - parentAfter; }
/** * Executes the updates. Resolving text node adjacency can be skipped if adjacent text nodes are * not to be expected. * * @param mergeTexts adjacent text nodes are to be expected and must be merged */ public void execute(final boolean mergeTexts) { check(); optimize(); applyValueUpdates(); if (cacheDistanceUpdates) data.cache = true; applyStructuralUpdates(); updateDistances(); if (mergeTexts) resolveTextAdjacency(); data.cache = false; }
@Override public boolean indexAccessible(final IndexInfo ii) { /* If the following conditions yield true, the index is accessed: * - all query terms are statically available * - no FTTimes option is specified * - explicitly set case, diacritics and stemming match options do not * conflict with index options. */ data = ii.ic.data; final MetaData md = data.meta; final FTOpt fto = ftt.opt; /* Index will be applied if no explicit match options have been set * that conflict with the index options. As a consequence, though, index- * based querying might yield other results than sequential scanning. */ if (occ != null || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE) || fto.isSet(DC) && md.diacritics != fto.is(DC) || fto.isSet(ST) && md.stemming != fto.is(ST) || fto.ln != null && !fto.ln.equals(md.language)) return false; // adopt database options to tokenizer fto.copy(md); // estimate costs if text is not known at compile time if (tokens == null) { ii.costs = Math.max(2, data.meta.size / 30); return true; } // summarize number of hits; break loop if no hits are expected final FTLexer ft = new FTLexer(fto); ii.costs = 0; for (byte[] t : tokens) { ft.init(t); while (ft.hasNext()) { final byte[] tok = ft.nextToken(); if (fto.sw != null && fto.sw.contains(tok)) continue; if (fto.is(WC)) { // don't use index if one of the terms starts with a wildcard t = ft.get(); if (t[0] == '.') return false; // don't use index if certain characters or more than 1 dot are found int d = 0; for (final byte w : t) { if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false; } } // favor full-text index requests over exact queries final int costs = data.costs(ft); if (costs != 0) ii.costs += Math.max(2, costs / 100); } } return true; }
@Override public BasicNodeIter iter(final QueryContext qc) { final boolean text = index.type() == IndexType.TEXT; final byte kind = text ? Data.TEXT : Data.ATTR; final Data data = ictx.data; final int ml = data.meta.maxlen; final IndexIterator ii = index.min.length <= ml && index.max.length <= ml && (text ? data.meta.textindex : data.meta.attrindex) ? data.iter(index) : scan(); return new BasicNodeIter() { @Override public ANode next() { return ii.more() ? new DBNode(data, ii.pre(), kind) : null; } }; }
/** * Updates distances to restore parent-child relationships that have been invalidated by * structural updates. * * <p>Each structural update (insert/delete) leads to a shift of higher PRE values. This * invalidates parent-child relationships. Distances are only updated after all structural updates * have been carried out to make sure each node (that has to be updated) is only touched once. */ public void updateDistances() { accumulatePreValueShifts(); final IntSet alreadyUpdatedNodes = new IntSet(); for (final BasicUpdate update : updStructural) { int newPreOfAffectedNode = update.preOfAffectedNode + update.accumulatedShifts; /* Update distance for the affected node and all following siblings of nodes * on the ancestor-or-self axis. */ while (newPreOfAffectedNode < data.meta.size) { if (alreadyUpdatedNodes.contains(newPreOfAffectedNode)) break; data.dist( newPreOfAffectedNode, data.kind(newPreOfAffectedNode), calculateNewDistance(newPreOfAffectedNode)); alreadyUpdatedNodes.add(newPreOfAffectedNode); newPreOfAffectedNode += data.size(newPreOfAffectedNode, data.kind(newPreOfAffectedNode)); } } }
/** * Removes superfluous update operations. If a node T is deleted or replaced, all updates on the * descendant axis of T can be left out as they won't affect the database after all. * * <p>Superfluous updates can have a minimum PRE value of pre(T)+1 and a maximum PRE value of * pre(T)+size(T). * * <p>An update with location pre(T)+size(T) can only be removed if the update is an atomic insert * and the inserted node is then part of the subtree of T. */ public void optimize() { if (opt) return; check(); // traverse from lowest to highest PRE value int i = updStructural.size() - 1; while (i >= 0) { final BasicUpdate u = updStructural.get(i); // If this update can lead to superfluous updates ... if (u.destructive()) { // we determine the lowest and highest PRE values of a superfluous update final int pre = u.location; final int fol = pre + data.size(pre, data.kind(pre)); i--; // and have a look at the next candidate while (i >= 0) { final BasicUpdate desc = updStructural.get(i); final int descpre = desc.location; // if the candidate operates on the subtree of T and inserts a node ... if (descpre <= fol && (desc instanceof Insert || desc instanceof InsertAttr) && desc.parent() >= pre && desc.parent() < fol) { // it is removed. updStructural.remove(i--); // Other updates (not inserting a node) that operate on the subtree of T can // only have a PRE value that is smaller than the following PRE of T } else if (descpre < fol) { // these we delete. updStructural.remove(i--); // Else there's nothing to delete } else break; } } else i--; } opt = true; }
/** * Returns atomic text node merging operations if necessary for the given node PRE and its right * neighbor PRE+1. * * @param a node PRE value * @param d target data reference * @return list of text merging operations */ private AtomicUpdateList necessaryMerges(final int a, final Data d) { final AtomicUpdateList mergeTwoNodes = new AtomicUpdateList(d); final int s = d.meta.size; final int b = a + 1; // don't leave table if (a >= s || b >= s || a < 0 || b < 0) return mergeTwoNodes; // only merge texts if (d.kind(a) != Data.TEXT || d.kind(b) != Data.TEXT) return mergeTwoNodes; // only merge neighboring texts if (d.parent(a, Data.TEXT) != d.parent(b, Data.TEXT)) return mergeTwoNodes; mergeTwoNodes.addDelete(b); mergeTwoNodes.addUpdateValue(a, Data.TEXT, Token.concat(d.text(a, true), d.text(b, true))); return mergeTwoNodes; }
/** * Inserts a data instance at the specified pre value. Note that the specified data instance must * differ from this instance. * * @param ipre value at which to insert new data * @param ipar parent pre value of node * @param clip data clip */ public final void insert(final int ipre, final int ipar, final DataClip clip) { meta.update(); // update value and document indexes if (meta.updindex) indexBegin(); resources.insert(ipre, clip); final int dsize = clip.size(); final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER); // resize buffer to cache more entries buffer(buf); // find all namespaces in scope to avoid duplicate declarations final TokenMap nsScope = nspaces.scope(ipar, this); // loop through all entries final IntList preStack = new IntList(); final NSNode nsRoot = nspaces.current(); final HashSet<NSNode> newNodes = new HashSet<NSNode>(); final IntList flagPres = new IntList(); // indicates if database only contains a dummy node final Data data = clip.data; int c = 0; for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) { if (c != 0 && c % buf == 0) insert(ipre + c - buf); final int pre = ipre + c; final int dkind = data.kind(dpre); final int dpar = data.parent(dpre, dkind); // ipar < 0 if document nodes on top level are added final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0; final int par = dis == 0 ? -1 : pre - dis; if (c == 0) nspaces.root(par, this); while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop()); switch (dkind) { case DOC: // add document nspaces.prepare(); final int s = data.size(dpre, dkind); doc(pre, s, data.text(dpre, true)); meta.ndocs++; preStack.push(pre); break; case ELEM: // add element nspaces.prepare(); boolean ne = false; if (data.nsFlag(dpre)) { final Atts at = data.ns(dpre); for (int a = 0; a < at.size(); ++a) { // see if prefix has been declared/ is part of current ns scope final byte[] old = nsScope.get(at.name(a)); if (old == null || !eq(old, at.value(a))) { // we have to keep track of all new NSNodes that are added // to the Namespace structure, as their pre values must not // be updated. I.e. if an NSNode N with pre value 3 existed // prior to inserting and two new nodes are inserted at // location pre == 3 we have to make sure N and only N gets // updated. newNodes.add(nspaces.add(at.name(a), at.value(a), pre)); ne = true; } } } byte[] nm = data.name(dpre, dkind); elem( dis, tagindex.index(nm, null, false), data.attSize(dpre, dkind), data.size(dpre, dkind), nspaces.uri(nm, true), ne); preStack.push(pre); break; case TEXT: case COMM: case PI: // add text text(pre, dis, data.text(dpre, true), dkind); break; case ATTR: // add attribute nm = data.name(dpre, dkind); // check if prefix already in nsScope or not final byte[] attPref = prefix(nm); // check if prefix of attribute has already been declared, otherwise // add declaration to parent node if (data.nsFlag(dpre) && nsScope.get(attPref) == null) { nspaces.add( par, preStack.isEmpty() ? -1 : preStack.peek(), attPref, data.nspaces.uri(data.uri(dpre, dkind)), this); // save pre value to set ns flag later for this node. can't be done // here as direct table access would interfere with the buffer flagPres.add(par); } attr( pre, dis, atnindex.index(nm, null, false), data.text(dpre, false), nspaces.uri(nm, false), false); break; } } // finalize and update namespace structure while (!preStack.isEmpty()) nspaces.close(preStack.pop()); nspaces.root(nsRoot); if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf); // reset buffer to old size buffer(1); // set ns flags for (int f = 0; f < flagPres.size(); f++) { final int fl = flagPres.get(f); table.write2(fl, 1, name(fl) | 1 << 15); } // increase size of ancestors int p = ipar; while (p >= 0) { final int k = kind(p); size(p, k, size(p, k) + dsize); p = parent(p, k); } if (meta.updindex) { // add the entries to the ID -> PRE mapping: idmap.insert(ipre, id(ipre), dsize); indexEnd(); } if (!cache) updateDist(ipre + dsize, dsize); // propagate PRE value shifts to namespaces if (ipar != -1) nspaces.insert(ipre, dsize, newNodes); }
/** * Replaces parts of the database with the specified data instance. * * @param rpre pre value to be replaced * @param clip data clip */ public final void replace(final int rpre, final DataClip clip) { meta.update(); final int dsize = clip.size(); final Data data = clip.data; final int rkind = kind(rpre); final int rsize = size(rpre, rkind); final int rpar = parent(rpre, rkind); final int diff = dsize - rsize; buffer(dsize); resources.replace(rpre, rsize, clip); if (meta.updindex) { // update index indexDelete(rpre, rsize); indexBegin(); } for (int dpre = clip.start; dpre < clip.end; ++dpre) { final int dkind = data.kind(dpre); final int dpar = data.parent(dpre, dkind); final int pre = rpre + dpre - clip.start; final int dis = dpar >= 0 ? dpre - dpar : pre - rpar; switch (dkind) { case DOC: // add document doc(pre, data.size(dpre, dkind), data.text(dpre, true)); meta.ndocs++; break; case ELEM: // add element byte[] nm = data.name(dpre, dkind); elem( dis, tagindex.index(nm, null, false), data.attSize(dpre, dkind), data.size(dpre, dkind), nspaces.uri(nm, true), false); break; case TEXT: case COMM: case PI: // add text text(pre, dis, data.text(dpre, true), dkind); break; case ATTR: // add attribute nm = data.name(dpre, dkind); attr( pre, dis, atnindex.index(nm, null, false), data.text(dpre, false), nspaces.uri(nm, false), false); break; } } if (meta.updindex) { indexEnd(); // update ID -> PRE map: idmap.delete(rpre, id(rpre), -rsize); idmap.insert(rpre, meta.lastid - dsize + 1, dsize); } // update table: table.replace(rpre, buffer(), rsize); buffer(1); // no distance/size update if the two subtrees are of equal size if (diff == 0) return; // increase/decrease size of ancestors, adjust distances of siblings int p = rpar; while (p >= 0) { final int k = kind(p); size(p, k, size(p, k) + diff); p = parent(p, k); } if (!cache) updateDist(rpre + dsize, diff); // adjust attribute size of parent if attributes inserted. attribute size // of parent cannot be reduced via a replace expression. int dpre = clip.start; if (data.kind(dpre) == ATTR) { int d = 0; while (dpre < clip.end && data.kind(dpre++) == ATTR) d++; if (d > 1) attSize(rpar, kind(rpar), d + 1); } }
/** * Adds a replace atomic to the list. * * @param pre PRE value of the target node/update location * @param d insertion sequence data reference */ public void addReplace(final int pre, final Data d) { final int oldsize = data.size(pre, data.kind(pre)); final int newsize = d.meta.size; add(new Replace(pre, newsize - oldsize, pre + oldsize, d), true); }
/** * Adds a delete atomic to the list. * * @param pre PRE value of the target node/update location */ public void addDelete(final int pre) { final int k = data.kind(pre); final int s = data.size(pre, k); add(new Delete(pre, -s, pre + s), true); }