Пример #1
0
  /**
   * Determines the pointer on a token.
   *
   * @param token token looking for
   * @return int pointer or {@code -1} if token was not found
   */
  private int token(final byte[] token) {
    final int tl = token.length;
    // left limit
    int l = tp[tl];
    if (l == -1) return -1;

    int i = 1;
    int r;
    // find right limit
    do r = tp[tl + i++];
    while (r == -1);
    final int x = r;

    // binary search
    final int o = tl + ENTRY;
    while (l < r) {
      final int m = l + (r - l >> 1) / o * o;
      final int c = diff(inY.readBytes(m, tl), token);
      if (c == 0) return m;
      if (c < 0) l = m + o;
      else r = m - o;
    }
    // accept entry if pointer is inside relevant tokens
    return r != x && l == r && eq(inY.readBytes(l, tl), token) ? l : -1;
  }
Пример #2
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Пример #3
0
 /**
  * Returns an iterator for an index entry.
  *
  * @param off offset on entries
  * @param size number of id/pos entries
  * @param da data source
  * @param token index token
  * @return iterator
  */
 private static FTIndexIterator iter(
     final long off, final int size, final DataAccess da, final byte[] token) {
   da.cursor(off);
   final IntList pr = new IntList(size);
   final IntList ps = new IntList(size);
   for (int c = 0; c < size; c++) {
     pr.add(da.readNum());
     ps.add(da.readNum());
   }
   return iter(new FTCache(pr, ps), token);
 }
Пример #4
0
  @Override
  public synchronized byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    final long l = inX.length() + inY.length() + inZ.length();
    tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL);
    tb.add(LI_SIZE + Performance.format(l, true) + NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    addOccs(stats);
    stats.print(tb);
    return tb.finish();
  }
Пример #5
0
  /**
   * Constructor, initializing the index structure.
   *
   * @param data data reference
   * @throws IOException I/O Exception
   */
  public FTIndex(final Data data) throws IOException {
    super(data, true);

    // cache token length index
    inY = new DataAccess(data.meta.dbfile(DATAFTX + 'y'));
    inZ = new DataAccess(data.meta.dbfile(DATAFTX + 'z'));
    inX = new DataAccess(data.meta.dbfile(DATAFTX + 'x'));
    tp = new int[data.meta.maxlen + 3];
    final int tl = tp.length;
    for (int i = 0; i < tl; ++i) tp[i] = -1;
    int is = inX.readNum();
    while (--is >= 0) {
      int p = inX.readNum();
      final int r;
      if (p < tl) {
        r = inX.read4();
      } else {
        // legacy issue (7.0.2 -> 7.1)
        r = p << 24 | (inX.read1() & 0xFF) << 16 | (inX.read1() & 0xFF) << 8 | inX.read1() & 0xFF;
        p = p >> 8 | 0x40;
      }
      tp[p] = r;
    }
    tp[tl - 1] = (int) inY.length();
  }
Пример #6
0
  /**
   * Collects all tokens and their sizes found in the index structure.
   *
   * @param stats statistics
   */
  private void addOccs(final IndexStats stats) {
    int i = 0;
    final int tl = tp.length;
    while (i < tl && tp[i] == -1) ++i;
    int p = tp[i], j = i + 1;
    while (j < tl && tp[j] == -1) ++j;

    final int max = tp[tl - 1];
    while (p < max) {
      final int oc = size(p, i);
      if (stats.adding(oc)) stats.add(inY.readBytes(p, i), oc);
      p += i + ENTRY;
      if (p == tp[j]) {
        i = j;
        while (j + 1 < tl && tp[++j] == -1) ;
      }
    }
  }
Пример #7
0
 /**
  * Binary search.
  *
  * @param token token to look for
  * @param start start position
  * @param end end position
  * @param ti entry length
  * @return position where the key was found, or would have been found
  */
 private int find(final byte[] token, final int start, final int end, final int ti) {
   final int tl = ti + ENTRY;
   int l = 0, h = (end - start) / tl;
   while (l <= h) {
     final int m = l + h >>> 1;
     final int p = start + m * tl;
     byte[] txt = ctext.get(p);
     if (txt == null) {
       txt = inY.readBytes(p, ti);
       ctext.put(p, txt);
     }
     final int d = diff(txt, token);
     if (d == 0) return start + m * tl;
     if (d < 0) l = m + 1;
     else h = m - 1;
   }
   return start + l * tl;
 }
Пример #8
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }
Пример #9
0
 /**
  * Reads the size of ftdata from disk.
  *
  * @param pt pointer on token
  * @param lt length of the token
  * @return size of the ftdata
  */
 private int size(final long pt, final int lt) {
   return inY.read4(pt + lt + 5);
 }
Пример #10
0
 /**
  * Gets the pointer on ftdata for a token.
  *
  * @param pt pointer on token
  * @param lt length of the token
  * @return int pointer on ftdata
  */
 private long pointer(final long pt, final int lt) {
   return inY.read5(pt + lt);
 }
Пример #11
0
 @Override
 public synchronized void close() {
   inX.close();
   inY.close();
   inZ.close();
 }