/** * Determines the pointer on a token. * * @param token token looking for * @return int pointer or {@code -1} if token was not found */ private int token(final byte[] token) { final int tl = token.length; // left limit int l = tp[tl]; if (l == -1) return -1; int i = 1; int r; // find right limit do r = tp[tl + i++]; while (r == -1); final int x = r; // binary search final int o = tl + ENTRY; while (l < r) { final int m = l + (r - l >> 1) / o * o; final int c = diff(inY.readBytes(m, tl), token); if (c == 0) return m; if (c < 0) l = m + o; else r = m - o; } // accept entry if pointer is inside relevant tokens return r != x && l == r && eq(inY.readBytes(l, tl), token) ? l : -1; }
/** * Performs a wildcard search for the specified token. * * @param token token to look for * @return iterator */ private synchronized IndexIterator wc(final byte[] token) { final FTIndexIterator it = FTIndexIterator.FTEMPTY; final FTWildcard wc = new FTWildcard(token); if (!wc.parse()) return it; final IntList pr = new IntList(); final IntList ps = new IntList(); final byte[] pref = wc.prefix(); final int pl = pref.length, tl = tp.length; final int l = Math.min(tl - 1, wc.max()); for (int ti = pl; ti <= l; ti++) { int i = tp[ti]; if (i == -1) continue; int c = ti + 1; int e = -1; while (c < tl && e == -1) e = tp[c++]; i = find(pref, i, e, ti); while (i < e) { final byte[] t = inY.readBytes(i, ti); if (!startsWith(t, pref)) break; if (wc.match(t)) { inZ.cursor(pointer(i, ti)); final int s = size(i, ti); for (int d = 0; d < s; d++) { pr.add(inZ.readNum()); ps.add(inZ.readNum()); } } i += ti + ENTRY; } } return iter(new FTCache(pr, ps), token); }
/** * Returns next token. * * @return byte[] token */ private byte[] token() { if (tp[tp.length - 1] == ptok) return EMPTY; if (tp[ntl] == ptok || ntl == 0) { ++ctl; while (tp[ctl] == -1) ++ctl; ntl = ctl + 1; while (tp[ntl] == -1) ++ntl; } if (ctl == tp.length) return EMPTY; final byte[] t = str.readBytes(ptok, ctl); // skip pointer size = str.read4(str.cursor() + 5); // position will always fit in an integer... ptok = (int) str.cursor(); return t; }
/** * Collects all tokens and their sizes found in the index structure. * * @param stats statistics */ private void addOccs(final IndexStats stats) { int i = 0; final int tl = tp.length; while (i < tl && tp[i] == -1) ++i; int p = tp[i], j = i + 1; while (j < tl && tp[j] == -1) ++j; final int max = tp[tl - 1]; while (p < max) { final int oc = size(p, i); if (stats.adding(oc)) stats.add(inY.readBytes(p, i), oc); p += i + ENTRY; if (p == tp[j]) { i = j; while (j + 1 < tl && tp[++j] == -1) ; } } }
/** * Binary search. * * @param token token to look for * @param start start position * @param end end position * @param ti entry length * @return position where the key was found, or would have been found */ private int find(final byte[] token, final int start, final int end, final int ti) { final int tl = ti + ENTRY; int l = 0, h = (end - start) / tl; while (l <= h) { final int m = l + h >>> 1; final int p = start + m * tl; byte[] txt = ctext.get(p); if (txt == null) { txt = inY.readBytes(p, ti); ctext.put(p, txt); } final int d = diff(txt, token); if (d == 0) return start + m * tl; if (d < 0) l = m + 1; else h = m - 1; } return start + l * tl; }
/** * Performs a fuzzy search for the specified token with a maximum number of errors. * * @param token token to look for * @param k number of errors allowed * @return iterator */ private synchronized IndexIterator fuzzy(final byte[] token, final int k) { FTIndexIterator it = FTIndexIterator.FTEMPTY; final int tokl = token.length, tl = tp.length; final int e = Math.min(tl - 1, tokl + k); int s = Math.max(1, tokl - k) - 1; while (++s <= e) { int p = tp[s]; if (p == -1) continue; int t = s + 1, r = -1; while (t < tl && r == -1) r = tp[t++]; while (p < r) { if (ls.similar(inY.readBytes(p, s), token, k)) { it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it); } p += s + ENTRY; } } return it; }