/** * Evaluates the full-text match. * * @param qc query context * @return number of tokens, used for scoring * @throws QueryException query exception */ private int contains(final QueryContext qc) throws QueryException { first = true; final FTLexer lexer = ftt.lexer(qc.ftToken); // use faster evaluation for default options int num = 0; if (fast) { for (final byte[] t : tokens) { final FTTokens qtok = ftt.cache(t); num = Math.max(num, ftt.contains(qtok, lexer) * qtok.length()); } return num; } // find and count all occurrences final boolean all = mode == FTMode.ALL || mode == FTMode.ALL_WORDS; int oc = 0; for (final byte[] w : unique(tokens(qc))) { final FTTokens qtok = ftt.cache(w); final int o = ftt.contains(qtok, lexer); if (all && o == 0) return 0; num = Math.max(num, o * qtok.length()); oc += o; } // check if occurrences are in valid range. if yes, return number of tokens final long mn = occ != null ? toLong(occ[0], qc) : 1; final long mx = occ != null ? toLong(occ[1], qc) : Long.MAX_VALUE; if (mn == 0 && oc == 0) matches = FTNot.not(matches); return oc >= mn && oc <= mx ? Math.max(1, num) : 0; }
@Override public boolean indexAccessible(final IndexInfo ii) { /* If the following conditions yield true, the index is accessed: * - all query terms are statically available * - no FTTimes option is specified * - explicitly set case, diacritics and stemming match options do not * conflict with index options. */ data = ii.ic.data; final MetaData md = data.meta; final FTOpt fto = ftt.opt; /* Index will be applied if no explicit match options have been set * that conflict with the index options. As a consequence, though, index- * based querying might yield other results than sequential scanning. */ if (occ != null || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE) || fto.isSet(DC) && md.diacritics != fto.is(DC) || fto.isSet(ST) && md.stemming != fto.is(ST) || fto.ln != null && !fto.ln.equals(md.language)) return false; // adopt database options to tokenizer fto.copy(md); // estimate costs if text is not known at compile time if (tokens == null) { ii.costs = Math.max(2, data.meta.size / 30); return true; } // summarize number of hits; break loop if no hits are expected final FTLexer ft = new FTLexer(fto); ii.costs = 0; for (byte[] t : tokens) { ft.init(t); while (ft.hasNext()) { final byte[] tok = ft.nextToken(); if (fto.sw != null && fto.sw.contains(tok)) continue; if (fto.is(WC)) { // don't use index if one of the terms starts with a wildcard t = ft.get(); if (t[0] == '.') return false; // don't use index if certain characters or more than 1 dot are found int d = 0; for (final byte w : t) { if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false; } } // favor full-text index requests over exact queries final int costs = data.costs(ft); if (costs != 0) ii.costs += Math.max(2, costs / 100); } } return true; }
/** * Performs a wildcard search for the specified token. * * @param token token to look for * @return iterator */ private synchronized IndexIterator wc(final byte[] token) { final FTIndexIterator it = FTIndexIterator.FTEMPTY; final FTWildcard wc = new FTWildcard(token); if (!wc.parse()) return it; final IntList pr = new IntList(); final IntList ps = new IntList(); final byte[] pref = wc.prefix(); final int pl = pref.length, tl = tp.length; final int l = Math.min(tl - 1, wc.max()); for (int ti = pl; ti <= l; ti++) { int i = tp[ti]; if (i == -1) continue; int c = ti + 1; int e = -1; while (c < tl && e == -1) e = tp[c++]; i = find(pref, i, e, ti); while (i < e) { final byte[] t = inY.readBytes(i, ti); if (!startsWith(t, pref)) break; if (wc.match(t)) { inZ.cursor(pointer(i, ti)); final int s = size(i, ti); for (int d = 0; d < s; d++) { pr.add(inZ.readNum()); ps.add(inZ.readNum()); } } i += ti + ENTRY; } } return iter(new FTCache(pr, ps), token); }
/** * Merges two matches. * * @param i1 first item * @param i2 second item */ private static void and(final FTNode i1, final FTNode i2) { final FTMatches all = new FTMatches((byte) Math.max(i1.matches().pos, i2.matches().pos)); for (final FTMatch s1 : i1.matches()) { for (final FTMatch s2 : i2.matches()) { all.add(new FTMatch(s1.size() + s2.size()).add(s1).add(s2)); } } i1.score(Scoring.avg(i1.score() + i2.score(), 2)); i1.matches(all); }
/** * Performs a fuzzy search for the specified token with a maximum number of errors. * * @param token token to look for * @param k number of errors allowed * @return iterator */ private synchronized IndexIterator fuzzy(final byte[] token, final int k) { FTIndexIterator it = FTIndexIterator.FTEMPTY; final int tokl = token.length, tl = tp.length; final int e = Math.min(tl - 1, tokl + k); int s = Math.max(1, tokl - k) - 1; while (++s <= e) { int p = tp[s]; if (p == -1) continue; int t = s + 1, r = -1; while (t < tl && r == -1) r = tp[t++]; while (p < r) { if (ls.similar(inY.readBytes(p, s), token, k)) { it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it); } p += s + ENTRY; } } return it; }
@Override public synchronized int costs(final IndexToken it) { final byte[] tok = it.get(); if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE; // estimate costs for queries which stretch over multiple index entries final FTOpt opt = ((FTLexer) it).ftOpt(); if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4); return entry(tok).size; }