Esempio n. 1
0
 /**
  * Calculates the new distance value for the given node.
  *
  * @param preAfter the current PRE value of the node (after structural updates have been applied)
  * @return new distance for the given node
  */
 private int calculateNewDistance(final int preAfter) {
   final int kind = data.kind(preAfter);
   final int distanceBefore = data.dist(preAfter, kind);
   final int preBefore = calculatePreValue(preAfter, true);
   final int parentBefore = preBefore - distanceBefore;
   final int parentAfter = calculatePreValue(parentBefore, false);
   return preAfter - parentAfter;
 }
Esempio n. 2
0
 /**
  * Executes the updates. Resolving text node adjacency can be skipped if adjacent text nodes are
  * not to be expected.
  *
  * @param mergeTexts adjacent text nodes are to be expected and must be merged
  */
 public void execute(final boolean mergeTexts) {
   check();
   optimize();
   applyValueUpdates();
   if (cacheDistanceUpdates) data.cache = true;
   applyStructuralUpdates();
   updateDistances();
   if (mergeTexts) resolveTextAdjacency();
   data.cache = false;
 }
Esempio n. 3
0
  @Override
  public boolean indexAccessible(final IndexInfo ii) {
    /* If the following conditions yield true, the index is accessed:
     * - all query terms are statically available
     * - no FTTimes option is specified
     * - explicitly set case, diacritics and stemming match options do not
     *   conflict with index options. */
    data = ii.ic.data;
    final MetaData md = data.meta;
    final FTOpt fto = ftt.opt;

    /* Index will be applied if no explicit match options have been set
     * that conflict with the index options. As a consequence, though, index-
     * based querying might yield other results than sequential scanning. */
    if (occ != null
        || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE)
        || fto.isSet(DC) && md.diacritics != fto.is(DC)
        || fto.isSet(ST) && md.stemming != fto.is(ST)
        || fto.ln != null && !fto.ln.equals(md.language)) return false;

    // adopt database options to tokenizer
    fto.copy(md);

    // estimate costs if text is not known at compile time
    if (tokens == null) {
      ii.costs = Math.max(2, data.meta.size / 30);
      return true;
    }

    // summarize number of hits; break loop if no hits are expected
    final FTLexer ft = new FTLexer(fto);
    ii.costs = 0;
    for (byte[] t : tokens) {
      ft.init(t);
      while (ft.hasNext()) {
        final byte[] tok = ft.nextToken();
        if (fto.sw != null && fto.sw.contains(tok)) continue;

        if (fto.is(WC)) {
          // don't use index if one of the terms starts with a wildcard
          t = ft.get();
          if (t[0] == '.') return false;
          // don't use index if certain characters or more than 1 dot are found
          int d = 0;
          for (final byte w : t) {
            if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false;
          }
        }
        // favor full-text index requests over exact queries
        final int costs = data.costs(ft);
        if (costs != 0) ii.costs += Math.max(2, costs / 100);
      }
    }
    return true;
  }
Esempio n. 4
0
  @Override
  public BasicNodeIter iter(final QueryContext qc) {
    final boolean text = index.type() == IndexType.TEXT;
    final byte kind = text ? Data.TEXT : Data.ATTR;
    final Data data = ictx.data;
    final int ml = data.meta.maxlen;
    final IndexIterator ii =
        index.min.length <= ml
                && index.max.length <= ml
                && (text ? data.meta.textindex : data.meta.attrindex)
            ? data.iter(index)
            : scan();

    return new BasicNodeIter() {
      @Override
      public ANode next() {
        return ii.more() ? new DBNode(data, ii.pre(), kind) : null;
      }
    };
  }
Esempio n. 5
0
  /**
   * Updates distances to restore parent-child relationships that have been invalidated by
   * structural updates.
   *
   * <p>Each structural update (insert/delete) leads to a shift of higher PRE values. This
   * invalidates parent-child relationships. Distances are only updated after all structural updates
   * have been carried out to make sure each node (that has to be updated) is only touched once.
   */
  public void updateDistances() {
    accumulatePreValueShifts();
    final IntSet alreadyUpdatedNodes = new IntSet();

    for (final BasicUpdate update : updStructural) {
      int newPreOfAffectedNode = update.preOfAffectedNode + update.accumulatedShifts;

      /* Update distance for the affected node and all following siblings of nodes
       * on the ancestor-or-self axis. */
      while (newPreOfAffectedNode < data.meta.size) {
        if (alreadyUpdatedNodes.contains(newPreOfAffectedNode)) break;
        data.dist(
            newPreOfAffectedNode,
            data.kind(newPreOfAffectedNode),
            calculateNewDistance(newPreOfAffectedNode));
        alreadyUpdatedNodes.add(newPreOfAffectedNode);
        newPreOfAffectedNode += data.size(newPreOfAffectedNode, data.kind(newPreOfAffectedNode));
      }
    }
  }
Esempio n. 6
0
  /**
   * Removes superfluous update operations. If a node T is deleted or replaced, all updates on the
   * descendant axis of T can be left out as they won't affect the database after all.
   *
   * <p>Superfluous updates can have a minimum PRE value of pre(T)+1 and a maximum PRE value of
   * pre(T)+size(T).
   *
   * <p>An update with location pre(T)+size(T) can only be removed if the update is an atomic insert
   * and the inserted node is then part of the subtree of T.
   */
  public void optimize() {
    if (opt) return;

    check();
    // traverse from lowest to highest PRE value
    int i = updStructural.size() - 1;
    while (i >= 0) {
      final BasicUpdate u = updStructural.get(i);
      // If this update can lead to superfluous updates ...
      if (u.destructive()) {
        // we determine the lowest and highest PRE values of a superfluous update
        final int pre = u.location;
        final int fol = pre + data.size(pre, data.kind(pre));
        i--;
        // and have a look at the next candidate
        while (i >= 0) {
          final BasicUpdate desc = updStructural.get(i);
          final int descpre = desc.location;
          // if the candidate operates on the subtree of T and inserts a node ...
          if (descpre <= fol
              && (desc instanceof Insert || desc instanceof InsertAttr)
              && desc.parent() >= pre
              && desc.parent() < fol) {
            // it is removed.
            updStructural.remove(i--);

            // Other updates (not inserting a node) that operate on the subtree of T can
            // only have a PRE value that is smaller than the following PRE of T
          } else if (descpre < fol) {
            // these we delete.
            updStructural.remove(i--);

            // Else there's nothing to delete
          } else break;
        }
      } else i--;
    }
    opt = true;
  }
Esempio n. 7
0
  /**
   * Returns atomic text node merging operations if necessary for the given node PRE and its right
   * neighbor PRE+1.
   *
   * @param a node PRE value
   * @param d target data reference
   * @return list of text merging operations
   */
  private AtomicUpdateList necessaryMerges(final int a, final Data d) {
    final AtomicUpdateList mergeTwoNodes = new AtomicUpdateList(d);
    final int s = d.meta.size;
    final int b = a + 1;
    // don't leave table
    if (a >= s || b >= s || a < 0 || b < 0) return mergeTwoNodes;
    // only merge texts
    if (d.kind(a) != Data.TEXT || d.kind(b) != Data.TEXT) return mergeTwoNodes;
    // only merge neighboring texts
    if (d.parent(a, Data.TEXT) != d.parent(b, Data.TEXT)) return mergeTwoNodes;

    mergeTwoNodes.addDelete(b);
    mergeTwoNodes.addUpdateValue(a, Data.TEXT, Token.concat(d.text(a, true), d.text(b, true)));

    return mergeTwoNodes;
  }
Esempio n. 8
0
  /**
   * Inserts a data instance at the specified pre value. Note that the specified data instance must
   * differ from this instance.
   *
   * @param ipre value at which to insert new data
   * @param ipar parent pre value of node
   * @param clip data clip
   */
  public final void insert(final int ipre, final int ipar, final DataClip clip) {
    meta.update();

    // update value and document indexes
    if (meta.updindex) indexBegin();
    resources.insert(ipre, clip);

    final int dsize = clip.size();
    final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER);
    // resize buffer to cache more entries
    buffer(buf);

    // find all namespaces in scope to avoid duplicate declarations
    final TokenMap nsScope = nspaces.scope(ipar, this);

    // loop through all entries
    final IntList preStack = new IntList();
    final NSNode nsRoot = nspaces.current();
    final HashSet<NSNode> newNodes = new HashSet<NSNode>();
    final IntList flagPres = new IntList();

    // indicates if database only contains a dummy node
    final Data data = clip.data;
    int c = 0;
    for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) {
      if (c != 0 && c % buf == 0) insert(ipre + c - buf);

      final int pre = ipre + c;
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      // ipar < 0 if document nodes on top level are added
      final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0;
      final int par = dis == 0 ? -1 : pre - dis;

      if (c == 0) nspaces.root(par, this);

      while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop());

      switch (dkind) {
        case DOC:
          // add document
          nspaces.prepare();
          final int s = data.size(dpre, dkind);
          doc(pre, s, data.text(dpre, true));
          meta.ndocs++;
          preStack.push(pre);
          break;
        case ELEM:
          // add element
          nspaces.prepare();
          boolean ne = false;
          if (data.nsFlag(dpre)) {
            final Atts at = data.ns(dpre);
            for (int a = 0; a < at.size(); ++a) {
              // see if prefix has been declared/ is part of current ns scope
              final byte[] old = nsScope.get(at.name(a));
              if (old == null || !eq(old, at.value(a))) {
                // we have to keep track of all new NSNodes that are added
                // to the Namespace structure, as their pre values must not
                // be updated. I.e. if an NSNode N with pre value 3 existed
                // prior to inserting and two new nodes are inserted at
                // location pre == 3 we have to make sure N and only N gets
                // updated.
                newNodes.add(nspaces.add(at.name(a), at.value(a), pre));
                ne = true;
              }
            }
          }
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              ne);
          preStack.push(pre);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          // check if prefix already in nsScope or not
          final byte[] attPref = prefix(nm);
          // check if prefix of attribute has already been declared, otherwise
          // add declaration to parent node
          if (data.nsFlag(dpre) && nsScope.get(attPref) == null) {
            nspaces.add(
                par,
                preStack.isEmpty() ? -1 : preStack.peek(),
                attPref,
                data.nspaces.uri(data.uri(dpre, dkind)),
                this);
            // save pre value to set ns flag later for this node. can't be done
            // here as direct table access would interfere with the buffer
            flagPres.add(par);
          }
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }
    // finalize and update namespace structure
    while (!preStack.isEmpty()) nspaces.close(preStack.pop());
    nspaces.root(nsRoot);

    if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf);
    // reset buffer to old size
    buffer(1);

    // set ns flags
    for (int f = 0; f < flagPres.size(); f++) {
      final int fl = flagPres.get(f);
      table.write2(fl, 1, name(fl) | 1 << 15);
    }

    // increase size of ancestors
    int p = ipar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + dsize);
      p = parent(p, k);
    }

    if (meta.updindex) {
      // add the entries to the ID -> PRE mapping:
      idmap.insert(ipre, id(ipre), dsize);
      indexEnd();
    }

    if (!cache) updateDist(ipre + dsize, dsize);

    // propagate PRE value shifts to namespaces
    if (ipar != -1) nspaces.insert(ipre, dsize, newNodes);
  }
Esempio n. 9
0
  /**
   * Replaces parts of the database with the specified data instance.
   *
   * @param rpre pre value to be replaced
   * @param clip data clip
   */
  public final void replace(final int rpre, final DataClip clip) {
    meta.update();

    final int dsize = clip.size();
    final Data data = clip.data;

    final int rkind = kind(rpre);
    final int rsize = size(rpre, rkind);
    final int rpar = parent(rpre, rkind);
    final int diff = dsize - rsize;
    buffer(dsize);
    resources.replace(rpre, rsize, clip);

    if (meta.updindex) {
      // update index
      indexDelete(rpre, rsize);
      indexBegin();
    }

    for (int dpre = clip.start; dpre < clip.end; ++dpre) {
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      final int pre = rpre + dpre - clip.start;
      final int dis = dpar >= 0 ? dpre - dpar : pre - rpar;

      switch (dkind) {
        case DOC:
          // add document
          doc(pre, data.size(dpre, dkind), data.text(dpre, true));
          meta.ndocs++;
          break;
        case ELEM:
          // add element
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              false);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }

    if (meta.updindex) {
      indexEnd();
      // update ID -> PRE map:
      idmap.delete(rpre, id(rpre), -rsize);
      idmap.insert(rpre, meta.lastid - dsize + 1, dsize);
    }

    // update table:
    table.replace(rpre, buffer(), rsize);
    buffer(1);

    // no distance/size update if the two subtrees are of equal size
    if (diff == 0) return;

    // increase/decrease size of ancestors, adjust distances of siblings
    int p = rpar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + diff);
      p = parent(p, k);
    }

    if (!cache) updateDist(rpre + dsize, diff);

    // adjust attribute size of parent if attributes inserted. attribute size
    // of parent cannot be reduced via a replace expression.
    int dpre = clip.start;
    if (data.kind(dpre) == ATTR) {
      int d = 0;
      while (dpre < clip.end && data.kind(dpre++) == ATTR) d++;
      if (d > 1) attSize(rpar, kind(rpar), d + 1);
    }
  }
Esempio n. 10
0
 /**
  * Adds a replace atomic to the list.
  *
  * @param pre PRE value of the target node/update location
  * @param d insertion sequence data reference
  */
 public void addReplace(final int pre, final Data d) {
   final int oldsize = data.size(pre, data.kind(pre));
   final int newsize = d.meta.size;
   add(new Replace(pre, newsize - oldsize, pre + oldsize, d), true);
 }
Esempio n. 11
0
 /**
  * Adds a delete atomic to the list.
  *
  * @param pre PRE value of the target node/update location
  */
 public void addDelete(final int pre) {
   final int k = data.kind(pre);
   final int s = data.size(pre, k);
   add(new Delete(pre, -s, pre + s), true);
 }