Example #1
0
  /**
   * Returns a pre value.
   *
   * @param id unique node id
   * @return pre value or -1 if id was not found
   */
  final int preold(final int id) {
    // find pre value in table
    for (int p = Math.max(0, id); p < meta.size; ++p) if (id == id(p)) return p;
    final int ps = Math.min(meta.size, id);
    for (int p = 0; p < ps; ++p) if (id == id(p)) return p;

    // id not found
    return -1;
  }
Example #2
0
  @Override
  public DiskData build() throws IOException {
    meta.assign(parser);
    meta.dirty = true;

    // calculate optimized output buffer sizes to reduce disk fragmentation
    final Runtime rt = Runtime.getRuntime();
    final long max = Math.min(1 << 22, rt.maxMemory() - rt.freeMemory() >> 2);
    int bs = (int) Math.min(meta.filesize, max);
    bs = Math.max(IO.BLOCKSIZE, bs - bs % IO.BLOCKSIZE);

    // drop old database (if available) and create new one
    DropDB.drop(dbname, sopts);
    sopts.dbpath(dbname).md();

    elemNames = new Names(meta);
    attrNames = new Names(meta);
    try {
      tout = new DataOutput(new TableOutput(meta, DATATBL));
      xout = new DataOutput(meta.dbfile(DATATXT), bs);
      vout = new DataOutput(meta.dbfile(DATAATV), bs);
      sout = new DataOutput(meta.dbfile(DATATMP), bs);

      final Performance perf = Prop.debug ? new Performance() : null;
      Util.debug(tit() + DOTS);
      parse();
      if (Prop.debug) Util.errln(" " + perf + " (" + Performance.getMemory() + ')');

    } catch (final IOException ex) {
      try {
        close();
      } catch (final IOException ignored) {
      }
      throw ex;
    }
    close();

    // copy temporary values into database table
    try (final DataInput in = new DataInput(meta.dbfile(DATATMP))) {
      final TableAccess ta = new TableDiskAccess(meta, true);
      for (; spos < ssize; ++spos) ta.write4(in.readNum(), 8, in.readNum());
      ta.close();
    }
    meta.dbfile(DATATMP).delete();

    // return database instance
    return new DiskData(meta, elemNames, attrNames, path, ns);
  }
Example #3
0
  /**
   * Adds an attribute entry to the internal update buffer.
   *
   * @param pre pre value
   * @param dist parent distance
   * @param name attribute name
   * @param value attribute value
   * @param uri namespace uri reference
   * @param ne namespace flag
   */
  public final void attr(
      final int pre,
      final int dist,
      final int name,
      final byte[] value,
      final int uri,
      final boolean ne) {

    // add attribute to text storage
    final int i = newID();
    final long v = index(pre, i, value, ATTR);
    final int n = ne ? 1 << 7 : 0;
    s(Math.min(IO.MAXATTS, dist) << 3 | ATTR);
    s(n | (byte) (name >> 8));
    s(name);
    s(v >> 32);
    s(v >> 24);
    s(v >> 16);
    s(v >> 8);
    s(v);
    s(0);
    s(0);
    s(0);
    s(uri);
    s(i >> 24);
    s(i >> 16);
    s(i >> 8);
    s(i);
  }
Example #4
0
  /**
   * Adds an element entry to the internal update buffer.
   *
   * @param dist parent distance
   * @param name tag name index
   * @param asize number of attributes
   * @param size node size
   * @param uri namespace uri reference
   * @param ne namespace flag
   */
  public final void elem(
      final int dist,
      final int name,
      final int asize,
      final int size,
      final int uri,
      final boolean ne) {

    // build and insert new entry
    final int i = newID();
    final int n = ne ? 1 << 7 : 0;
    s(Math.min(IO.MAXATTS, asize) << 3 | ELEM);
    s(n | (byte) (name >> 8));
    s(name);
    s(uri);
    s(dist >> 24);
    s(dist >> 16);
    s(dist >> 8);
    s(dist);
    s(size >> 24);
    s(size >> 16);
    s(size >> 8);
    s(size);
    s(i >> 24);
    s(i >> 16);
    s(i >> 8);
    s(i);
  }
Example #5
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Example #6
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }
Example #7
0
  @Override
  public synchronized int costs(final IndexToken it) {
    final byte[] tok = it.get();
    if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE;

    // estimate costs for queries which stretch over multiple index entries
    final FTOpt opt = ((FTLexer) it).ftOpt();
    if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4);

    return entry(tok).size;
  }
Example #8
0
  /**
   * Inserts a data instance at the specified pre value. Note that the specified data instance must
   * differ from this instance.
   *
   * @param ipre value at which to insert new data
   * @param ipar parent pre value of node
   * @param clip data clip
   */
  public final void insert(final int ipre, final int ipar, final DataClip clip) {
    meta.update();

    // update value and document indexes
    if (meta.updindex) indexBegin();
    resources.insert(ipre, clip);

    final int dsize = clip.size();
    final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER);
    // resize buffer to cache more entries
    buffer(buf);

    // find all namespaces in scope to avoid duplicate declarations
    final TokenMap nsScope = nspaces.scope(ipar, this);

    // loop through all entries
    final IntList preStack = new IntList();
    final NSNode nsRoot = nspaces.current();
    final HashSet<NSNode> newNodes = new HashSet<NSNode>();
    final IntList flagPres = new IntList();

    // indicates if database only contains a dummy node
    final Data data = clip.data;
    int c = 0;
    for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) {
      if (c != 0 && c % buf == 0) insert(ipre + c - buf);

      final int pre = ipre + c;
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      // ipar < 0 if document nodes on top level are added
      final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0;
      final int par = dis == 0 ? -1 : pre - dis;

      if (c == 0) nspaces.root(par, this);

      while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop());

      switch (dkind) {
        case DOC:
          // add document
          nspaces.prepare();
          final int s = data.size(dpre, dkind);
          doc(pre, s, data.text(dpre, true));
          meta.ndocs++;
          preStack.push(pre);
          break;
        case ELEM:
          // add element
          nspaces.prepare();
          boolean ne = false;
          if (data.nsFlag(dpre)) {
            final Atts at = data.ns(dpre);
            for (int a = 0; a < at.size(); ++a) {
              // see if prefix has been declared/ is part of current ns scope
              final byte[] old = nsScope.get(at.name(a));
              if (old == null || !eq(old, at.value(a))) {
                // we have to keep track of all new NSNodes that are added
                // to the Namespace structure, as their pre values must not
                // be updated. I.e. if an NSNode N with pre value 3 existed
                // prior to inserting and two new nodes are inserted at
                // location pre == 3 we have to make sure N and only N gets
                // updated.
                newNodes.add(nspaces.add(at.name(a), at.value(a), pre));
                ne = true;
              }
            }
          }
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              ne);
          preStack.push(pre);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          // check if prefix already in nsScope or not
          final byte[] attPref = prefix(nm);
          // check if prefix of attribute has already been declared, otherwise
          // add declaration to parent node
          if (data.nsFlag(dpre) && nsScope.get(attPref) == null) {
            nspaces.add(
                par,
                preStack.isEmpty() ? -1 : preStack.peek(),
                attPref,
                data.nspaces.uri(data.uri(dpre, dkind)),
                this);
            // save pre value to set ns flag later for this node. can't be done
            // here as direct table access would interfere with the buffer
            flagPres.add(par);
          }
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }
    // finalize and update namespace structure
    while (!preStack.isEmpty()) nspaces.close(preStack.pop());
    nspaces.root(nsRoot);

    if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf);
    // reset buffer to old size
    buffer(1);

    // set ns flags
    for (int f = 0; f < flagPres.size(); f++) {
      final int fl = flagPres.get(f);
      table.write2(fl, 1, name(fl) | 1 << 15);
    }

    // increase size of ancestors
    int p = ipar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + dsize);
      p = parent(p, k);
    }

    if (meta.updindex) {
      // add the entries to the ID -> PRE mapping:
      idmap.insert(ipre, id(ipre), dsize);
      indexEnd();
    }

    if (!cache) updateDist(ipre + dsize, dsize);

    // propagate PRE value shifts to namespaces
    if (ipar != -1) nspaces.insert(ipre, dsize, newNodes);
  }