Пример #1
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Пример #2
0
  @Override
  protected long index(final int pre, final int id, final byte[] value, final int kind) {
    final DataAccess store;
    final TokenObjMap<IntList> map;
    if (kind == ATTR) {
      store = values;
      map = meta.attrindex ? atvBuffer : null;
    } else {
      store = texts;
      // don't index document names
      map = meta.textindex && kind != DOC ? txtBuffer : null;
    }

    // add text to map to index later
    if (meta.updindex && map != null && value.length <= meta.maxlen) {
      IntList ids = map.get(value);
      if (ids == null) {
        ids = new IntList(1);
        map.put(value, ids);
      }
      ids.add(id);
    }

    // add text to text file
    // inline integer value...
    final long v = toSimpleInt(value);
    if (v != Integer.MIN_VALUE) return v | IO.OFFNUM;

    // store text
    final long off = store.length();
    final byte[] val = COMP.get().pack(value);
    store.writeToken(off, val);
    return val == value ? off : off | IO.OFFCOMP;
  }
Пример #3
0
 /** Closes and deletes the input files. */
 private void close() {
   str.close();
   dat.close();
   files.delete();
   filed.delete();
   sizes.delete();
 }
Пример #4
0
  /**
   * Determines the pointer on a token.
   *
   * @param token token looking for
   * @return int pointer or {@code -1} if token was not found
   */
  private int token(final byte[] token) {
    final int tl = token.length;
    // left limit
    int l = tp[tl];
    if (l == -1) return -1;

    int i = 1;
    int r;
    // find right limit
    do r = tp[tl + i++];
    while (r == -1);
    final int x = r;

    // binary search
    final int o = tl + ENTRY;
    while (l < r) {
      final int m = l + (r - l >> 1) / o * o;
      final int c = diff(inY.readBytes(m, tl), token);
      if (c == 0) return m;
      if (c < 0) l = m + o;
      else r = m - o;
    }
    // accept entry if pointer is inside relevant tokens
    return r != x && l == r && eq(inY.readBytes(l, tl), token) ? l : -1;
  }
Пример #5
0
 @Override
 public int textLen(final int pre, final boolean text) {
   final long o = textOff(pre);
   if (number(o)) return numDigits((int) o);
   final DataAccess da = text ? texts : values;
   final int l = da.readNum(o & IO.OFFCOMP - 1);
   // compressed: next number contains number of compressed bytes
   return compressed(o) ? da.readNum() : l;
 }
Пример #6
0
 /**
  * Returns an iterator for an index entry.
  *
  * @param off offset on entries
  * @param size number of id/pos entries
  * @param da data source
  * @param token index token
  * @return iterator
  */
 private static FTIndexIterator iter(
     final long off, final int size, final DataAccess da, final byte[] token) {
   da.cursor(off);
   final IntList pr = new IntList(size);
   final IntList ps = new IntList(size);
   for (int c = 0; c < size; c++) {
     pr.add(da.readNum());
     ps.add(da.readNum());
   }
   return iter(new FTCache(pr, ps), token);
 }
Пример #7
0
  @Override
  public synchronized byte[] info(final MainOptions options) {
    final TokenBuilder tb = new TokenBuilder();
    final long l = inX.length() + inY.length() + inZ.length();
    tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL);
    tb.add(LI_SIZE + Performance.format(l, true) + NL);

    final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT));
    addOccs(stats);
    stats.print(tb);
    return tb.finish();
  }
Пример #8
0
 @Override
 public synchronized void flush(final boolean all) {
   try {
     table.flush(all);
     if (all) {
       write();
       texts.flush();
       values.flush();
       if (textIndex != null) ((DiskValues) textIndex).flush();
       if (attrIndex != null) ((DiskValues) attrIndex).flush();
     }
   } catch (final IOException ex) {
     Util.stack(ex);
   }
 }
Пример #9
0
 @Override
 public synchronized void close() {
   if (closed) return;
   closed = true;
   try {
     write();
     table.close();
     texts.close();
     values.close();
     close(IndexType.TEXT);
     close(IndexType.ATTRIBUTE);
     close(IndexType.FULLTEXT);
   } catch (final IOException ex) {
     Util.stack(ex);
   }
 }
Пример #10
0
  /**
   * Returns next token.
   *
   * @return byte[] token
   */
  private byte[] token() {
    if (tp[tp.length - 1] == ptok) return EMPTY;
    if (tp[ntl] == ptok || ntl == 0) {
      ++ctl;
      while (tp[ctl] == -1) ++ctl;
      ntl = ctl + 1;
      while (tp[ntl] == -1) ++ntl;
    }
    if (ctl == tp.length) return EMPTY;

    final byte[] t = str.readBytes(ptok, ctl);
    // skip pointer
    size = str.read4(str.cursor() + 5);
    // position will always fit in an integer...
    ptok = (int) str.cursor();
    return t;
  }
Пример #11
0
  /**
   * Constructor, initializing the index structure.
   *
   * @param data data reference
   * @throws IOException I/O Exception
   */
  public FTIndex(final Data data) throws IOException {
    super(data, true);

    // cache token length index
    inY = new DataAccess(data.meta.dbfile(DATAFTX + 'y'));
    inZ = new DataAccess(data.meta.dbfile(DATAFTX + 'z'));
    inX = new DataAccess(data.meta.dbfile(DATAFTX + 'x'));
    tp = new int[data.meta.maxlen + 3];
    final int tl = tp.length;
    for (int i = 0; i < tl; ++i) tp[i] = -1;
    int is = inX.readNum();
    while (--is >= 0) {
      int p = inX.readNum();
      final int r;
      if (p < tl) {
        r = inX.read4();
      } else {
        // legacy issue (7.0.2 -> 7.1)
        r = p << 24 | (inX.read1() & 0xFF) << 16 | (inX.read1() & 0xFF) << 8 | inX.read1() & 0xFF;
        p = p >> 8 | 0x40;
      }
      tp[p] = r;
    }
    tp[tl - 1] = (int) inY.length();
  }
Пример #12
0
  @Override
  protected void updateText(final int pre, final byte[] value, final int kind) {
    final boolean text = kind != ATTR;

    if (meta.updindex) {
      // update indexes
      final int id = id(pre);
      final byte[] oldval = text(pre, text);
      final DiskValues index = (DiskValues) (text ? textIndex : attrIndex);
      // don't index document names
      if (index != null && kind != DOC) index.replace(oldval, value, id);
    }

    // reference to text store
    final DataAccess store = text ? texts : values;
    // file length
    final long len = store.length();

    // new entry (offset or value)
    final long v = toSimpleInt(value);
    if (v != Integer.MIN_VALUE) {
      // inline integer value
      textOff(pre, v | IO.OFFNUM);
    } else {
      // text to be stored (possibly packed)
      final byte[] val = COMP.get().pack(value);
      // old entry (offset or value)
      final long old = textOff(pre);

      // find text store offset
      final long off;
      if (number(old)) {
        // numeric entry: append new entry at the end
        off = len;
      } else {
        // text size (0 if value will be inlined)
        final int vl = val.length;
        off = store.free(old & IO.OFFCOMP - 1, vl + Num.length(vl));
      }

      store.writeToken(off, val);
      textOff(pre, val == value ? off : off | IO.OFFCOMP);
    }
  }
Пример #13
0
 /**
  * Constructor, initializing the index structure.
  *
  * @param data data
  * @param prefix prefix
  * @throws IOException I/O exception
  */
 FTList(final Data data, final int prefix) throws IOException {
   files = data.meta.dbfile(DATAFTX + prefix + 'y');
   filed = data.meta.dbfile(DATAFTX + prefix + 'z');
   str = new DataAccess(files);
   dat = new DataAccess(filed);
   tp = new int[data.meta.maxlen + 3];
   final int tl = tp.length;
   for (int t = 0; t < tl; t++) tp[t] = -1;
   sizes = data.meta.dbfile(DATAFTX + prefix + 'x');
   try (final DataAccess li = new DataAccess(sizes)) {
     int is = li.readNum();
     while (--is >= 0) {
       final int p = li.readNum();
       tp[p] = li.read4();
     }
     tp[tl - 1] = (int) str.length();
   }
   next();
 }
Пример #14
0
  /**
   * Collects all tokens and their sizes found in the index structure.
   *
   * @param stats statistics
   */
  private void addOccs(final IndexStats stats) {
    int i = 0;
    final int tl = tp.length;
    while (i < tl && tp[i] == -1) ++i;
    int p = tp[i], j = i + 1;
    while (j < tl && tp[j] == -1) ++j;

    final int max = tp[tl - 1];
    while (p < max) {
      final int oc = size(p, i);
      if (stats.adding(oc)) stats.add(inY.readBytes(p, i), oc);
      p += i + ENTRY;
      if (p == tp[j]) {
        i = j;
        while (j + 1 < tl && tp[++j] == -1) ;
      }
    }
  }
Пример #15
0
 /**
  * Binary search.
  *
  * @param token token to look for
  * @param start start position
  * @param end end position
  * @param ti entry length
  * @return position where the key was found, or would have been found
  */
 private int find(final byte[] token, final int start, final int end, final int ti) {
   final int tl = ti + ENTRY;
   int l = 0, h = (end - start) / tl;
   while (l <= h) {
     final int m = l + h >>> 1;
     final int p = start + m * tl;
     byte[] txt = ctext.get(p);
     if (txt == null) {
       txt = inY.readBytes(p, ti);
       ctext.put(p, txt);
     }
     final int d = diff(txt, token);
     if (d == 0) return start + m * tl;
     if (d < 0) l = m + 1;
     else h = m - 1;
   }
   return start + l * tl;
 }
Пример #16
0
  /** Checks if more tokens are found. */
  void next() {
    if (wasted) return;

    tok = token();
    if (tok.length == 0) {
      wasted = true;
      prv = NOINTS;
      pov = NOINTS;
      close();
    } else {
      prv = new int[size];
      pov = new int[size];
      for (int j = 0; j < size; ++j) {
        prv[j] = dat.readNum();
        pov[j] = dat.readNum();
      }
    }
  }
Пример #17
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }
Пример #18
0
 /**
  * Gets the pointer on ftdata for a token.
  *
  * @param pt pointer on token
  * @param lt length of the token
  * @return int pointer on ftdata
  */
 private long pointer(final long pt, final int lt) {
   return inY.read5(pt + lt);
 }
Пример #19
0
 @Override
 public synchronized void close() {
   inX.close();
   inY.close();
   inZ.close();
 }
Пример #20
0
 /**
  * Reads the size of ftdata from disk.
  *
  * @param pt pointer on token
  * @param lt length of the token
  * @return size of the ftdata
  */
 private int size(final long pt, final int lt) {
   return inY.read4(pt + lt + 5);
 }