Ejemplo n.º 1
0
  /**
   * Returns pre values.
   *
   * @param ids unique node ids
   * @param off start offset
   * @param len number of ids
   * @return sorted pre values
   */
  public final int[] pre(final int[] ids, final int off, final int len) {
    if (meta.updindex) return idmap.pre(ids, off, len);

    final IntList p = new IntList(ids.length);
    for (int i = off; i < len; ++i) p.add(preold(ids[i]));
    return p.sort().toArray();
  }
Ejemplo n.º 2
0
  @Override
  protected long index(final int pre, final int id, final byte[] value, final int kind) {
    final DataAccess store;
    final TokenObjMap<IntList> map;
    if (kind == ATTR) {
      store = values;
      map = meta.attrindex ? atvBuffer : null;
    } else {
      store = texts;
      // don't index document names
      map = meta.textindex && kind != DOC ? txtBuffer : null;
    }

    // add text to map to index later
    if (meta.updindex && map != null && value.length <= meta.maxlen) {
      IntList ids = map.get(value);
      if (ids == null) {
        ids = new IntList(1);
        map.put(value, ids);
      }
      ids.add(id);
    }

    // add text to text file
    // inline integer value...
    final long v = toSimpleInt(value);
    if (v != Integer.MIN_VALUE) return v | IO.OFFNUM;

    // store text
    final long off = store.length();
    final byte[] val = COMP.get().pack(value);
    store.writeToken(off, val);
    return val == value ? off : off | IO.OFFCOMP;
  }
Ejemplo n.º 3
0
  /**
   * Computes the Soundex value for the specified codepoints.
   *
   * @param cps codepoint array
   * @param mapping mapping for the 26 ASCII letters
   * @return Soundex value
   * @throws QueryException if Soundex mapping is shorter or longer than 26 characters
   */
  public static int[] encode(final int[] cps, final int[] mapping) throws QueryException {
    // check length of character mappings
    if (mapping.length != 26) throw new QueryException("Soundex mapping must have 26 characters");

    // normalize input to ascii characters (ignore all others)
    final IntList tmp = new IntList(cps.length);
    for (final int cp : cps) {
      final int c = uc(cp);
      if (c >= 'A' && c <= 'Z') tmp.add(c);
    }

    final int[] out = {'0', '0', '0', '0'}, in = tmp.finish();
    final int is = in.length;
    if (is > 0) {
      out[0] = in[0];
      for (int op = 1, ip = 0, lastCode = map(in, ip++, mapping); ip < is && op < 4; ) {
        final int code = map(in, ip++, mapping);
        if (code != 0) {
          if (code != '0' && code != lastCode) out[op++] = code;
          lastCode = code;
        }
      }
    }
    return out;
  }
Ejemplo n.º 4
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Ejemplo n.º 5
0
 @Override
 protected void indexDelete(final int pre, final int size) {
   final boolean textI = meta.textindex, attrI = meta.attrindex;
   if (textI || attrI) {
     // collect all keys and ids
     indexBegin();
     final int l = pre + size;
     for (int p = pre; p < l; ++p) {
       final int k = kind(p);
       // consider nodes which are attribute, text, comment, or proc. instruction
       final boolean text = k == TEXT || k == COMM || k == PI;
       if (textI && text || attrI && k == ATTR) {
         final byte[] key = text(p, text);
         if (key.length <= meta.maxlen) {
           final TokenObjMap<IntList> m = text ? txtBuffer : atvBuffer;
           IntList ids = m.get(key);
           if (ids == null) {
             ids = new IntList(1);
             m.put(key, ids);
           }
           ids.add(id(p));
         }
       }
     }
     indexDelete();
   }
 }
Ejemplo n.º 6
0
  @Override
  public IndexIterator iter(final IndexToken token) {
    final int id = values.id(token.get());
    if (id == 0) return IndexIterator.EMPTY;

    final int len = lenList.get(id);
    final int[] ids = idsList.get(id), pres;
    if (data.meta.updindex) {
      final IntList tmp = new IntList();
      for (int i = 0; i < len; ++i) tmp.add(data.pre(ids[i]));
      pres = tmp.sort().finish();
    } else {
      pres = ids;
    }

    return new IndexIterator() {
      int p;

      @Override
      public boolean more() {
        return p < len;
      }

      @Override
      public int pre() {
        return pres[p++];
      }

      @Override
      public int size() {
        return len;
      }
    };
  }
Ejemplo n.º 7
0
 /**
  * Constructor.
  *
  * @param pr pre values
  * @param ps positions
  */
 private FTCache(final IntList pr, final IntList ps) {
   final int s = pr.size();
   final double[] v = new double[s];
   for (int i = 0; i < s; i++) v[i] = (long) pr.get(i) << 32 | ps.get(i);
   order = Array.createOrder(v, true);
   pre = pr;
   pos = ps;
 }
Ejemplo n.º 8
0
 @Override
 public void add(final ValueCache cache) {
   for (final byte[] key : cache) {
     final IntList vals = cache.ids(key);
     if (!vals.isEmpty()) add(key, vals.sort().finish());
   }
   finish();
 }
Ejemplo n.º 9
0
 /**
  * Removes values from the index.
  *
  * @param key key
  * @param vals sorted values
  */
 void delete(final byte[] key, final int... vals) {
   final int id = values.id(key), vl = vals.length, l = lenList.get(id), s = l - vl;
   final int[] ids = idsList.get(id);
   for (int i = 0, n = 0, v = 0; i < l; i++) {
     if (v == vl || ids[i] != vals[v]) ids[n++] = ids[i];
     else v++;
   }
   lenList.set(id, s);
   if (s == 0) idsList.set(id, null);
 }
Ejemplo n.º 10
0
 /**
  * Returns an iterator for an index entry.
  *
  * @param off offset on entries
  * @param size number of id/pos entries
  * @param da data source
  * @param token index token
  * @return iterator
  */
 private static FTIndexIterator iter(
     final long off, final int size, final DataAccess da, final byte[] token) {
   da.cursor(off);
   final IntList pr = new IntList(size);
   final IntList ps = new IntList(size);
   for (int c = 0; c < size; c++) {
     pr.add(da.readNum());
     ps.add(da.readNum());
   }
   return iter(new FTCache(pr, ps), token);
 }
Ejemplo n.º 11
0
 /** Finishes the index creation. */
 void finish() {
   if (reorder == null) return;
   for (int i = 1; i < reorder.size(); i++) {
     if (reorder.get(i)) Arrays.sort(idsList.get(i), 0, lenList.get(i));
   }
   reorder = null;
 }
Ejemplo n.º 12
0
 @Override
 public int size() {
   // returns the actual number of indexed entries
   int s = 0;
   for (int c = 1; c < s; c++) if (lenList.get(c) > 0) s++;
   return s;
 }
Ejemplo n.º 13
0
  /**
   * Lists resources of the specified database.
   *
   * @return success flag
   * @throws IOException I/O exception
   */
  private boolean listDB() throws IOException {
    final String db = args[0];
    final String path = args[1] != null ? args[1] : "";
    if (!Databases.validName(db)) return error(NAME_INVALID_X, db);

    final Table table = new Table();
    table.description = RESOURCES;
    table.header.add(INPUT_PATH);
    table.header.add(TYPE);
    table.header.add(MimeTypes.CONTENT_TYPE);
    table.header.add(SIZE);

    try {
      // add xml documents
      final Data data = Open.open(db, context);
      final Resources res = data.resources;
      final IntList il = res.docs(path);
      final int ds = il.size();
      for (int i = 0; i < ds; i++) {
        final int pre = il.get(i);
        final TokenList tl = new TokenList(3);
        final byte[] file = data.text(pre, true);
        tl.add(file);
        tl.add(DataText.M_XML);
        tl.add(MimeTypes.APP_XML);
        tl.add(data.size(pre, Data.DOC));
        table.contents.add(tl);
      }
      // add binary resources
      for (final byte[] file : res.binaries(path)) {
        final String f = string(file);
        final TokenList tl = new TokenList(3);
        tl.add(file);
        tl.add(DataText.M_RAW);
        tl.add(MimeTypes.get(f));
        tl.add(data.meta.binary(f).length());
        table.contents.add(tl);
      }
      Close.close(data, context);
    } catch (final IOException ex) {
      return error(Util.message(ex));
    }
    out.println(table.sort().finish());
    return true;
  }
Ejemplo n.º 14
0
  @Override
  public Item item(final QueryContext qc, final InputInfo ii) throws QueryException {
    final Data data = checkData(qc);
    final String path = path(1, qc);
    final Item item = toItem(exprs[2], qc);
    final Options opts = toOptions(3, Q_OPTIONS, new Options(), qc);

    final Updates updates = qc.resources.updates();
    final IntList docs = data.resources.docs(path);
    int d = 0;

    // delete binary resources
    final IOFile bin = data.meta.binary(path);
    if (bin == null || bin.isDir()) throw BXDB_REPLACE_X.get(info, path);

    if (item instanceof Bin) {
      updates.add(new DBStore(data, path, item, info), qc);
    } else {
      if (bin.exists()) updates.add(new DBDelete(data, path, info), qc);
      final NewInput input = checkInput(item, token(path));
      if (docs.isEmpty() || docs.get(0) == 0) {
        // no replacement of first document (because of TableDiskAccess#insert, used > 0, pre = 0)
        updates.add(new DBAdd(data, input, opts, qc, info), qc);
      } else {
        updates.add(new ReplaceDoc(docs.get(0), data, input, opts, qc, info), qc);
        d = 1;
      }
    }

    // delete old documents
    final int ds = docs.size();
    for (; d < ds; d++) updates.add(new DeleteNode(docs.get(d), data, info), qc);
    return null;
  }
Ejemplo n.º 15
0
  /**
   * Adds values to the index.
   *
   * @param key key to be indexed
   * @param vals sorted values
   */
  void add(final byte[] key, final int... vals) {
    // token index: add values. otherwise, reference existing values
    final int id = type == IndexType.TOKEN ? values.put(key) : values.id(key), vl = vals.length;
    // updatable index: if required, resize existing arrays
    while (idsList.size() < id + 1) idsList.add(null);
    if (lenList.size() < id + 1) lenList.set(id, 0);

    final int len = lenList.get(id), size = len + vl;
    int[] ids = idsList.get(id);
    if (ids == null) {
      ids = vals;
    } else {
      if (ids.length < size) ids = Arrays.copyOf(ids, Array.newSize(size));
      System.arraycopy(vals, 0, ids, len, vl);
      if (ids[len - 1] > vals[0]) {
        if (reorder == null) reorder = new BoolList(values.size());
        reorder.set(id, true);
      }
    }
    idsList.set(id, ids);
    lenList.set(id, size);
  }
Ejemplo n.º 16
0
 /**
  * Returns a string representation of the index structure.
  *
  * @param all include database contents in the representation. During updates, database lookups
  *     must be avoided, as the data structures will be inconsistent.
  * @return string
  */
 public String toString(final boolean all) {
   final TokenBuilder tb = new TokenBuilder();
   tb.addExt(type).add(" INDEX, '").add(data.meta.name).add("':\n");
   final int s = lenList.size();
   for (int m = 1; m < s; m++) {
     final int len = lenList.get(m);
     if (len == 0) continue;
     final int[] ids = idsList.get(m);
     tb.add("  ").addInt(m);
     if (all)
       tb.add(", key: \"").add(data.text(data.pre(ids[0]), type == IndexType.TEXT)).add('"');
     tb.add(", ids");
     if (all) tb.add("/pres");
     tb.add(": ");
     for (int n = 0; n < len; n++) {
       if (n != 0) tb.add(",");
       tb.addInt(ids[n]);
       if (all) tb.add('/').addInt(data.pre(ids[n]));
     }
     tb.add("\n");
   }
   return tb.toString();
 }
Ejemplo n.º 17
0
  @Override
  public byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    tb.add(LI_STRUCTURE).add(HASH).add(NL);
    tb.add(LI_NAMES).add(data.meta.names(type)).add(NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    final int s = values.size();
    for (int p = 1; p <= s; p++) {
      final int oc = lenList.get(p);
      if (oc > 0 && stats.adding(oc)) stats.add(values.key(p), oc);
    }
    stats.print(tb);
    return tb.finish();
  }
Ejemplo n.º 18
0
 @Override
 public int costs(final IndexToken it) {
   return lenList.get(values.id(it.get()));
 }
Ejemplo n.º 19
0
  /**
   * Inserts a data instance at the specified pre value. Note that the specified data instance must
   * differ from this instance.
   *
   * @param ipre value at which to insert new data
   * @param ipar parent pre value of node
   * @param clip data clip
   */
  public final void insert(final int ipre, final int ipar, final DataClip clip) {
    meta.update();

    // update value and document indexes
    if (meta.updindex) indexBegin();
    resources.insert(ipre, clip);

    final int dsize = clip.size();
    final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER);
    // resize buffer to cache more entries
    buffer(buf);

    // find all namespaces in scope to avoid duplicate declarations
    final TokenMap nsScope = nspaces.scope(ipar, this);

    // loop through all entries
    final IntList preStack = new IntList();
    final NSNode nsRoot = nspaces.current();
    final HashSet<NSNode> newNodes = new HashSet<NSNode>();
    final IntList flagPres = new IntList();

    // indicates if database only contains a dummy node
    final Data data = clip.data;
    int c = 0;
    for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) {
      if (c != 0 && c % buf == 0) insert(ipre + c - buf);

      final int pre = ipre + c;
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      // ipar < 0 if document nodes on top level are added
      final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0;
      final int par = dis == 0 ? -1 : pre - dis;

      if (c == 0) nspaces.root(par, this);

      while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop());

      switch (dkind) {
        case DOC:
          // add document
          nspaces.prepare();
          final int s = data.size(dpre, dkind);
          doc(pre, s, data.text(dpre, true));
          meta.ndocs++;
          preStack.push(pre);
          break;
        case ELEM:
          // add element
          nspaces.prepare();
          boolean ne = false;
          if (data.nsFlag(dpre)) {
            final Atts at = data.ns(dpre);
            for (int a = 0; a < at.size(); ++a) {
              // see if prefix has been declared/ is part of current ns scope
              final byte[] old = nsScope.get(at.name(a));
              if (old == null || !eq(old, at.value(a))) {
                // we have to keep track of all new NSNodes that are added
                // to the Namespace structure, as their pre values must not
                // be updated. I.e. if an NSNode N with pre value 3 existed
                // prior to inserting and two new nodes are inserted at
                // location pre == 3 we have to make sure N and only N gets
                // updated.
                newNodes.add(nspaces.add(at.name(a), at.value(a), pre));
                ne = true;
              }
            }
          }
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              ne);
          preStack.push(pre);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          // check if prefix already in nsScope or not
          final byte[] attPref = prefix(nm);
          // check if prefix of attribute has already been declared, otherwise
          // add declaration to parent node
          if (data.nsFlag(dpre) && nsScope.get(attPref) == null) {
            nspaces.add(
                par,
                preStack.isEmpty() ? -1 : preStack.peek(),
                attPref,
                data.nspaces.uri(data.uri(dpre, dkind)),
                this);
            // save pre value to set ns flag later for this node. can't be done
            // here as direct table access would interfere with the buffer
            flagPres.add(par);
          }
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }
    // finalize and update namespace structure
    while (!preStack.isEmpty()) nspaces.close(preStack.pop());
    nspaces.root(nsRoot);

    if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf);
    // reset buffer to old size
    buffer(1);

    // set ns flags
    for (int f = 0; f < flagPres.size(); f++) {
      final int fl = flagPres.get(f);
      table.write2(fl, 1, name(fl) | 1 << 15);
    }

    // increase size of ancestors
    int p = ipar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + dsize);
      p = parent(p, k);
    }

    if (meta.updindex) {
      // add the entries to the ID -> PRE mapping:
      idmap.insert(ipre, id(ipre), dsize);
      indexEnd();
    }

    if (!cache) updateDist(ipre + dsize, dsize);

    // propagate PRE value shifts to namespaces
    if (ipar != -1) nspaces.insert(ipre, dsize, newNodes);
  }
Ejemplo n.º 20
0
  /**
   * Formats the specified number and returns a string representation.
   *
   * @param item item
   * @param pics pictures
   * @param ii input info
   * @return picture variables
   * @throws QueryException query exception
   */
  private byte[] format(final ANum item, final Picture[] pics, final InputInfo ii)
      throws QueryException {

    // Rule 1: return results for NaN
    final double d = item.dbl(ii);
    if (Double.isNaN(d)) return nan;

    // Rule 2: check if value if negative (smaller than zero or -0)
    final boolean neg = d < 0 || d == 0 && Double.doubleToLongBits(d) == Long.MIN_VALUE;
    final Picture pic = pics[neg && pics.length == 2 ? 1 : 0];
    final IntList res = new IntList(), intgr = new IntList(), fract = new IntList();
    int exp = 0;

    // Rule 3: percent/permille
    ANum num = item;
    if (pic.pc) num = (ANum) Calc.MULT.ev(num, Int.get(100), ii);
    if (pic.pm) num = (ANum) Calc.MULT.ev(num, Int.get(1000), ii);

    if (Double.isInfinite(num.dbl(ii))) {
      // Rule 4: infinity
      intgr.add(new TokenParser(inf).toArray());
    } else {
      // Rule 5: exponent
      if (pic.minExp != 0 && d != 0) {
        BigDecimal dec = num.dec(ii).abs().stripTrailingZeros();
        int scl = 0;
        if (dec.compareTo(BigDecimal.ONE) >= 0) {
          scl = dec.setScale(0, RoundingMode.HALF_DOWN).precision();
        } else {
          while (dec.compareTo(BigDecimal.ONE) < 0) {
            dec = dec.multiply(BigDecimal.TEN);
            scl--;
          }
          scl++;
        }
        exp = scl - pic.min[0];
        if (exp != 0) {
          final BigDecimal n = BigDecimal.TEN.pow(Math.abs(exp));
          num = (ANum) Calc.MULT.ev(num, Dec.get(exp > 0 ? BigDecimal.ONE.divide(n) : n), ii);
        }
      }
      num = num.round(pic.maxFrac, true).abs();

      // convert positive number to string
      final String s =
          (num instanceof Dbl || num instanceof Flt
                  ? Dec.get(BigDecimal.valueOf(num.dbl(ii)))
                  : num)
              .toString();

      // integer/fractional separator
      final int sep = s.indexOf('.');

      // create integer part
      final int sl = s.length();
      final int il = sep == -1 ? sl : sep;
      for (int i = il; i < pic.min[0]; ++i) intgr.add(zero);
      // fractional number: skip leading 0
      if (!s.startsWith("0.") || pic.min[0] > 0) {
        for (int i = 0; i < il; i++) intgr.add(zero + s.charAt(i) - '0');
      }

      // squeeze in grouping separators
      if (pic.group[0].length == 1 && pic.group[0][0] > 0) {
        // regular pattern with repeating separators
        for (int p = intgr.size() - (neg ? 2 : 1); p > 0; --p) {
          if (p % pic.group[0][0] == 0) intgr.insert(intgr.size() - p, grouping);
        }
      } else {
        // irregular pattern, or no separators at all
        final int gl = pic.group[0].length;
        for (int g = 0; g < gl; ++g) {
          final int pos = intgr.size() - pic.group[0][g];
          if (pos > 0) intgr.insert(pos, grouping);
        }
      }

      // create fractional part
      final int fl = sep == -1 ? 0 : sl - il - 1;
      if (fl != 0) for (int i = sep + 1; i < sl; i++) fract.add(zero + s.charAt(i) - '0');
      for (int i = fl; i < pic.min[1]; ++i) fract.add(zero);

      // squeeze in grouping separators in a reverse manner
      final int ul = fract.size();
      for (int p = pic.group[1].length - 1; p >= 0; p--) {
        final int pos = pic.group[1][p];
        if (pos < ul) fract.insert(pos, grouping);
      }
    }

    // add minus sign
    if (neg && pics.length != 2) res.add(minus);
    // add prefix and integer part
    res.add(pic.prefSuf[0].toArray()).add(intgr.finish());
    // add fractional part
    if (!fract.isEmpty()) res.add(decimal).add(fract.finish());
    // add exponent
    if (pic.minExp != 0) {
      res.add(exponent);
      if (exp < 0) res.add(minus);
      final String s = Integer.toString(Math.abs(exp));
      final int sl = s.length();
      for (int i = sl; i < pic.minExp; i++) res.add(zero);
      for (int i = 0; i < sl; i++) res.add(zero + s.charAt(i) - '0');
    }
    // add suffix
    res.add(pic.prefSuf[1].toArray());
    return new TokenBuilder(res.finish()).finish();
  }