/** * Performs a wildcard search for the specified token. * * @param token token to look for * @return iterator */ private synchronized IndexIterator wc(final byte[] token) { final FTIndexIterator it = FTIndexIterator.FTEMPTY; final FTWildcard wc = new FTWildcard(token); if (!wc.parse()) return it; final IntList pr = new IntList(); final IntList ps = new IntList(); final byte[] pref = wc.prefix(); final int pl = pref.length, tl = tp.length; final int l = Math.min(tl - 1, wc.max()); for (int ti = pl; ti <= l; ti++) { int i = tp[ti]; if (i == -1) continue; int c = ti + 1; int e = -1; while (c < tl && e == -1) e = tp[c++]; i = find(pref, i, e, ti); while (i < e) { final byte[] t = inY.readBytes(i, ti); if (!startsWith(t, pref)) break; if (wc.match(t)) { inZ.cursor(pointer(i, ti)); final int s = size(i, ti); for (int d = 0; d < s; d++) { pr.add(inZ.readNum()); ps.add(inZ.readNum()); } } i += ti + ENTRY; } } return iter(new FTCache(pr, ps), token); }
@Override protected long index(final int pre, final int id, final byte[] value, final int kind) { final DataAccess store; final TokenObjMap<IntList> map; if (kind == ATTR) { store = values; map = meta.attrindex ? atvBuffer : null; } else { store = texts; // don't index document names map = meta.textindex && kind != DOC ? txtBuffer : null; } // add text to map to index later if (meta.updindex && map != null && value.length <= meta.maxlen) { IntList ids = map.get(value); if (ids == null) { ids = new IntList(1); map.put(value, ids); } ids.add(id); } // add text to text file // inline integer value... final long v = toSimpleInt(value); if (v != Integer.MIN_VALUE) return v | IO.OFFNUM; // store text final long off = store.length(); final byte[] val = COMP.get().pack(value); store.writeToken(off, val); return val == value ? off : off | IO.OFFCOMP; }
/** Closes and deletes the input files. */ private void close() { str.close(); dat.close(); files.delete(); filed.delete(); sizes.delete(); }
/** * Determines the pointer on a token. * * @param token token looking for * @return int pointer or {@code -1} if token was not found */ private int token(final byte[] token) { final int tl = token.length; // left limit int l = tp[tl]; if (l == -1) return -1; int i = 1; int r; // find right limit do r = tp[tl + i++]; while (r == -1); final int x = r; // binary search final int o = tl + ENTRY; while (l < r) { final int m = l + (r - l >> 1) / o * o; final int c = diff(inY.readBytes(m, tl), token); if (c == 0) return m; if (c < 0) l = m + o; else r = m - o; } // accept entry if pointer is inside relevant tokens return r != x && l == r && eq(inY.readBytes(l, tl), token) ? l : -1; }
@Override public int textLen(final int pre, final boolean text) { final long o = textOff(pre); if (number(o)) return numDigits((int) o); final DataAccess da = text ? texts : values; final int l = da.readNum(o & IO.OFFCOMP - 1); // compressed: next number contains number of compressed bytes return compressed(o) ? da.readNum() : l; }
/** * Returns an iterator for an index entry. * * @param off offset on entries * @param size number of id/pos entries * @param da data source * @param token index token * @return iterator */ private static FTIndexIterator iter( final long off, final int size, final DataAccess da, final byte[] token) { da.cursor(off); final IntList pr = new IntList(size); final IntList ps = new IntList(size); for (int c = 0; c < size; c++) { pr.add(da.readNum()); ps.add(da.readNum()); } return iter(new FTCache(pr, ps), token); }
@Override public synchronized byte[] info(final MainOptions options) { final TokenBuilder tb = new TokenBuilder(); final long l = inX.length() + inY.length() + inZ.length(); tb.add(LI_NAMES).add(data.meta.ftinclude).add(NL); tb.add(LI_SIZE + Performance.format(l, true) + NL); final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT)); addOccs(stats); stats.print(tb); return tb.finish(); }
@Override public synchronized void flush(final boolean all) { try { table.flush(all); if (all) { write(); texts.flush(); values.flush(); if (textIndex != null) ((DiskValues) textIndex).flush(); if (attrIndex != null) ((DiskValues) attrIndex).flush(); } } catch (final IOException ex) { Util.stack(ex); } }
@Override public synchronized void close() { if (closed) return; closed = true; try { write(); table.close(); texts.close(); values.close(); close(IndexType.TEXT); close(IndexType.ATTRIBUTE); close(IndexType.FULLTEXT); } catch (final IOException ex) { Util.stack(ex); } }
/** * Returns next token. * * @return byte[] token */ private byte[] token() { if (tp[tp.length - 1] == ptok) return EMPTY; if (tp[ntl] == ptok || ntl == 0) { ++ctl; while (tp[ctl] == -1) ++ctl; ntl = ctl + 1; while (tp[ntl] == -1) ++ntl; } if (ctl == tp.length) return EMPTY; final byte[] t = str.readBytes(ptok, ctl); // skip pointer size = str.read4(str.cursor() + 5); // position will always fit in an integer... ptok = (int) str.cursor(); return t; }
/** * Constructor, initializing the index structure. * * @param data data reference * @throws IOException I/O Exception */ public FTIndex(final Data data) throws IOException { super(data, true); // cache token length index inY = new DataAccess(data.meta.dbfile(DATAFTX + 'y')); inZ = new DataAccess(data.meta.dbfile(DATAFTX + 'z')); inX = new DataAccess(data.meta.dbfile(DATAFTX + 'x')); tp = new int[data.meta.maxlen + 3]; final int tl = tp.length; for (int i = 0; i < tl; ++i) tp[i] = -1; int is = inX.readNum(); while (--is >= 0) { int p = inX.readNum(); final int r; if (p < tl) { r = inX.read4(); } else { // legacy issue (7.0.2 -> 7.1) r = p << 24 | (inX.read1() & 0xFF) << 16 | (inX.read1() & 0xFF) << 8 | inX.read1() & 0xFF; p = p >> 8 | 0x40; } tp[p] = r; } tp[tl - 1] = (int) inY.length(); }
@Override protected void updateText(final int pre, final byte[] value, final int kind) { final boolean text = kind != ATTR; if (meta.updindex) { // update indexes final int id = id(pre); final byte[] oldval = text(pre, text); final DiskValues index = (DiskValues) (text ? textIndex : attrIndex); // don't index document names if (index != null && kind != DOC) index.replace(oldval, value, id); } // reference to text store final DataAccess store = text ? texts : values; // file length final long len = store.length(); // new entry (offset or value) final long v = toSimpleInt(value); if (v != Integer.MIN_VALUE) { // inline integer value textOff(pre, v | IO.OFFNUM); } else { // text to be stored (possibly packed) final byte[] val = COMP.get().pack(value); // old entry (offset or value) final long old = textOff(pre); // find text store offset final long off; if (number(old)) { // numeric entry: append new entry at the end off = len; } else { // text size (0 if value will be inlined) final int vl = val.length; off = store.free(old & IO.OFFCOMP - 1, vl + Num.length(vl)); } store.writeToken(off, val); textOff(pre, val == value ? off : off | IO.OFFCOMP); } }
/** * Constructor, initializing the index structure. * * @param data data * @param prefix prefix * @throws IOException I/O exception */ FTList(final Data data, final int prefix) throws IOException { files = data.meta.dbfile(DATAFTX + prefix + 'y'); filed = data.meta.dbfile(DATAFTX + prefix + 'z'); str = new DataAccess(files); dat = new DataAccess(filed); tp = new int[data.meta.maxlen + 3]; final int tl = tp.length; for (int t = 0; t < tl; t++) tp[t] = -1; sizes = data.meta.dbfile(DATAFTX + prefix + 'x'); try (final DataAccess li = new DataAccess(sizes)) { int is = li.readNum(); while (--is >= 0) { final int p = li.readNum(); tp[p] = li.read4(); } tp[tl - 1] = (int) str.length(); } next(); }
/** * Collects all tokens and their sizes found in the index structure. * * @param stats statistics */ private void addOccs(final IndexStats stats) { int i = 0; final int tl = tp.length; while (i < tl && tp[i] == -1) ++i; int p = tp[i], j = i + 1; while (j < tl && tp[j] == -1) ++j; final int max = tp[tl - 1]; while (p < max) { final int oc = size(p, i); if (stats.adding(oc)) stats.add(inY.readBytes(p, i), oc); p += i + ENTRY; if (p == tp[j]) { i = j; while (j + 1 < tl && tp[++j] == -1) ; } } }
/** * Binary search. * * @param token token to look for * @param start start position * @param end end position * @param ti entry length * @return position where the key was found, or would have been found */ private int find(final byte[] token, final int start, final int end, final int ti) { final int tl = ti + ENTRY; int l = 0, h = (end - start) / tl; while (l <= h) { final int m = l + h >>> 1; final int p = start + m * tl; byte[] txt = ctext.get(p); if (txt == null) { txt = inY.readBytes(p, ti); ctext.put(p, txt); } final int d = diff(txt, token); if (d == 0) return start + m * tl; if (d < 0) l = m + 1; else h = m - 1; } return start + l * tl; }
/** Checks if more tokens are found. */ void next() { if (wasted) return; tok = token(); if (tok.length == 0) { wasted = true; prv = NOINTS; pov = NOINTS; close(); } else { prv = new int[size]; pov = new int[size]; for (int j = 0; j < size; ++j) { prv[j] = dat.readNum(); pov[j] = dat.readNum(); } } }
/** * Performs a fuzzy search for the specified token with a maximum number of errors. * * @param token token to look for * @param k number of errors allowed * @return iterator */ private synchronized IndexIterator fuzzy(final byte[] token, final int k) { FTIndexIterator it = FTIndexIterator.FTEMPTY; final int tokl = token.length, tl = tp.length; final int e = Math.min(tl - 1, tokl + k); int s = Math.max(1, tokl - k) - 1; while (++s <= e) { int p = tp[s]; if (p == -1) continue; int t = s + 1, r = -1; while (t < tl && r == -1) r = tp[t++]; while (p < r) { if (ls.similar(inY.readBytes(p, s), token, k)) { it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it); } p += s + ENTRY; } } return it; }
/** * Gets the pointer on ftdata for a token. * * @param pt pointer on token * @param lt length of the token * @return int pointer on ftdata */ private long pointer(final long pt, final int lt) { return inY.read5(pt + lt); }
@Override public synchronized void close() { inX.close(); inY.close(); inZ.close(); }
/** * Reads the size of ftdata from disk. * * @param pt pointer on token * @param lt length of the token * @return size of the ftdata */ private int size(final long pt, final int lt) { return inY.read4(pt + lt + 5); }