Beispiel #1
0
  /**
   * Determines the pointer on a token.
   *
   * @param token token looking for
   * @return int pointer or {@code -1} if token was not found
   */
  private int token(final byte[] token) {
    final int tl = token.length;
    // left limit
    int l = tp[tl];
    if (l == -1) return -1;

    int i = 1;
    int r;
    // find right limit
    do r = tp[tl + i++];
    while (r == -1);
    final int x = r;

    // binary search
    final int o = tl + ENTRY;
    while (l < r) {
      final int m = l + (r - l >> 1) / o * o;
      final int c = diff(inY.readBytes(m, tl), token);
      if (c == 0) return m;
      if (c < 0) l = m + o;
      else r = m - o;
    }
    // accept entry if pointer is inside relevant tokens
    return r != x && l == r && eq(inY.readBytes(l, tl), token) ? l : -1;
  }
Beispiel #2
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Beispiel #3
0
  /**
   * Collects all tokens and their sizes found in the index structure.
   *
   * @param stats statistics
   */
  private void addOccs(final IndexStats stats) {
    int i = 0;
    final int tl = tp.length;
    while (i < tl && tp[i] == -1) ++i;
    int p = tp[i], j = i + 1;
    while (j < tl && tp[j] == -1) ++j;

    final int max = tp[tl - 1];
    while (p < max) {
      final int oc = size(p, i);
      if (stats.adding(oc)) stats.add(inY.readBytes(p, i), oc);
      p += i + ENTRY;
      if (p == tp[j]) {
        i = j;
        while (j + 1 < tl && tp[++j] == -1) ;
      }
    }
  }
Beispiel #4
0
 /**
  * Binary search.
  *
  * @param token token to look for
  * @param start start position
  * @param end end position
  * @param ti entry length
  * @return position where the key was found, or would have been found
  */
 private int find(final byte[] token, final int start, final int end, final int ti) {
   final int tl = ti + ENTRY;
   int l = 0, h = (end - start) / tl;
   while (l <= h) {
     final int m = l + h >>> 1;
     final int p = start + m * tl;
     byte[] txt = ctext.get(p);
     if (txt == null) {
       txt = inY.readBytes(p, ti);
       ctext.put(p, txt);
     }
     final int d = diff(txt, token);
     if (d == 0) return start + m * tl;
     if (d < 0) l = m + 1;
     else h = m - 1;
   }
   return start + l * tl;
 }
Beispiel #5
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }