/** * Evaluates the full-text match. * * @param qc query context * @return number of tokens, used for scoring * @throws QueryException query exception */ private int contains(final QueryContext qc) throws QueryException { first = true; final FTLexer lexer = ftt.lexer(qc.ftToken); // use faster evaluation for default options int num = 0; if (fast) { for (final byte[] t : tokens) { final FTTokens qtok = ftt.cache(t); num = Math.max(num, ftt.contains(qtok, lexer) * qtok.length()); } return num; } // find and count all occurrences final boolean all = mode == FTMode.ALL || mode == FTMode.ALL_WORDS; int oc = 0; for (final byte[] w : unique(tokens(qc))) { final FTTokens qtok = ftt.cache(w); final int o = ftt.contains(qtok, lexer); if (all && o == 0) return 0; num = Math.max(num, o * qtok.length()); oc += o; } // check if occurrences are in valid range. if yes, return number of tokens final long mn = occ != null ? toLong(occ[0], qc) : 1; final long mx = occ != null ? toLong(occ[1], qc) : Long.MAX_VALUE; if (mn == 0 && oc == 0) matches = FTNot.not(matches); return oc >= mn && oc <= mx ? Math.max(1, num) : 0; }
@Override public boolean indexAccessible(final IndexInfo ii) { /* If the following conditions yield true, the index is accessed: * - all query terms are statically available * - no FTTimes option is specified * - explicitly set case, diacritics and stemming match options do not * conflict with index options. */ data = ii.ic.data; final MetaData md = data.meta; final FTOpt fto = ftt.opt; /* Index will be applied if no explicit match options have been set * that conflict with the index options. As a consequence, though, index- * based querying might yield other results than sequential scanning. */ if (occ != null || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE) || fto.isSet(DC) && md.diacritics != fto.is(DC) || fto.isSet(ST) && md.stemming != fto.is(ST) || fto.ln != null && !fto.ln.equals(md.language)) return false; // adopt database options to tokenizer fto.copy(md); // estimate costs if text is not known at compile time if (tokens == null) { ii.costs = Math.max(2, data.meta.size / 30); return true; } // summarize number of hits; break loop if no hits are expected final FTLexer ft = new FTLexer(fto); ii.costs = 0; for (byte[] t : tokens) { ft.init(t); while (ft.hasNext()) { final byte[] tok = ft.nextToken(); if (fto.sw != null && fto.sw.contains(tok)) continue; if (fto.is(WC)) { // don't use index if one of the terms starts with a wildcard t = ft.get(); if (t[0] == '.') return false; // don't use index if certain characters or more than 1 dot are found int d = 0; for (final byte w : t) { if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false; } } // favor full-text index requests over exact queries final int costs = data.costs(ft); if (costs != 0) ii.costs += Math.max(2, costs / 100); } } return true; }
/** * Returns a pre value. * * @param id unique node id * @return pre value or -1 if id was not found */ final int preold(final int id) { // find pre value in table for (int p = Math.max(0, id); p < meta.size; ++p) if (id == id(p)) return p; final int ps = Math.min(meta.size, id); for (int p = 0; p < ps; ++p) if (id == id(p)) return p; // id not found return -1; }
@Override public synchronized int costs(final IndexToken it) { final byte[] tok = it.get(); if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE; // estimate costs for queries which stretch over multiple index entries final FTOpt opt = ((FTLexer) it).ftOpt(); if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4); return entry(tok).size; }
/** * Performs a fuzzy search for the specified token with a maximum number of errors. * * @param token token to look for * @param k number of errors allowed * @return iterator */ private synchronized IndexIterator fuzzy(final byte[] token, final int k) { FTIndexIterator it = FTIndexIterator.FTEMPTY; final int tokl = token.length, tl = tp.length; final int e = Math.min(tl - 1, tokl + k); int s = Math.max(1, tokl - k) - 1; while (++s <= e) { int p = tp[s]; if (p == -1) continue; int t = s + 1, r = -1; while (t < tl && r == -1) r = tp[t++]; while (p < r) { if (ls.similar(inY.readBytes(p, s), token, k)) { it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it); } p += s + ENTRY; } } return it; }
/** * Prints the current stack trace to System.err. * * @param i number of steps to print */ public static void stack(final int i) { errln("You're here:"); final String[] stack = toArray(new Throwable()); final int l = Math.min(Math.max(2, i + 2), stack.length); for (int s = 2; s < l; ++s) errln(stack[s]); }
/** * Analyzes the specified patterns. * * @param patterns patterns * @return picture variables */ private Picture[] analyze(final byte[][] patterns) { // pictures final int picL = patterns.length; final Picture[] pics = new Picture[picL]; // analyze patterns for (int p = 0; p < picL; p++) { final byte[] pt = patterns[p]; final Picture pic = new Picture(); // position (integer/fractional) int pos = 0; // active character found boolean act = false; // number of characters after exponent int exp = -1; // number of optional characters final int[] opt = new int[2]; // loop through all characters final int pl = pt.length; for (int i = 0, cl; i < pl; i += cl) { final int ch = ch(pt, i); cl = cl(pt, i); boolean active = contains(actives, ch); if (ch == decimal) { ++pos; act = false; } else if (ch == optional) { opt[pos]++; } else if (ch == exponent) { if (act && containsActive(pt, i + cl)) { exp = 0; } else { active = false; } } else if (ch == grouping) { if (pos == 0) pic.group[pos] = Array.add(pic.group[pos], pic.min[pos] + opt[pos]); } else if (contains(digits, ch)) { if (exp == -1) pic.min[pos]++; else exp++; } if (active) { act = true; } else { // passive characters pic.pc |= ch == percent; pic.pm |= ch == permille; // prefixes/suffixes pic.prefSuf[pos == 0 && act ? pos + 1 : pos].add(ch); } } // finalize integer-part-grouping-positions final int[] igp = pic.group[0]; final int igl = igp.length; for (int g = 0; g < igl; ++g) igp[g] = pic.min[0] + opt[0] - igp[g]; // check if integer-part-grouping-positions are regular // if yes, they are replaced with a single position if (igl > 1) { boolean reg = true; final int i = igp[igl - 1]; for (int g = igl - 2; g >= 0; --g) reg &= i * igl == igp[g]; if (reg) pic.group[0] = new int[] {i}; } pic.maxFrac = pic.min[1] + opt[1]; pic.minExp = Math.max(0, exp); pics[p] = pic; } return pics; }