@Override public boolean indexAccessible(final IndexInfo ii) { /* If the following conditions yield true, the index is accessed: * - all query terms are statically available * - no FTTimes option is specified * - explicitly set case, diacritics and stemming match options do not * conflict with index options. */ data = ii.ic.data; final MetaData md = data.meta; final FTOpt fto = ftt.opt; /* Index will be applied if no explicit match options have been set * that conflict with the index options. As a consequence, though, index- * based querying might yield other results than sequential scanning. */ if (occ != null || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE) || fto.isSet(DC) && md.diacritics != fto.is(DC) || fto.isSet(ST) && md.stemming != fto.is(ST) || fto.ln != null && !fto.ln.equals(md.language)) return false; // adopt database options to tokenizer fto.copy(md); // estimate costs if text is not known at compile time if (tokens == null) { ii.costs = Math.max(2, data.meta.size / 30); return true; } // summarize number of hits; break loop if no hits are expected final FTLexer ft = new FTLexer(fto); ii.costs = 0; for (byte[] t : tokens) { ft.init(t); while (ft.hasNext()) { final byte[] tok = ft.nextToken(); if (fto.sw != null && fto.sw.contains(tok)) continue; if (fto.is(WC)) { // don't use index if one of the terms starts with a wildcard t = ft.get(); if (t[0] == '.') return false; // don't use index if certain characters or more than 1 dot are found int d = 0; for (final byte w : t) { if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false; } } // favor full-text index requests over exact queries final int costs = data.costs(ft); if (costs != 0) ii.costs += Math.max(2, costs / 100); } } return true; }
@Override public synchronized IndexIterator iter(final IndexToken it) { final byte[] tok = it.get(); // wildcard search final FTLexer lexer = (FTLexer) it; final FTOpt opt = lexer.ftOpt(); if (opt.is(WC)) return wc(tok); // fuzzy search if (opt.is(FZ)) return fuzzy(tok, lexer.lserror(tok)); // return cached or new result final IndexEntry e = entry(tok); return e.size > 0 ? iter(e.offset, e.size, inZ, tok) : FTIndexIterator.FTEMPTY; }
/** * Caches and returns all unique tokens specified in a query. * * @param list token list * @return token set */ private TokenSet unique(final TokenList list) { // cache all query tokens in a set (duplicates are removed) final TokenSet ts = new TokenSet(); switch (mode) { case ALL: case ANY: for (final byte[] t : list) ts.add(t); break; case ALL_WORDS: case ANY_WORD: final FTLexer l = new FTLexer(ftt.opt); for (final byte[] t : list) { l.init(t); while (l.hasNext()) ts.add(l.nextToken()); } break; case PHRASE: final TokenBuilder tb = new TokenBuilder(); for (final byte[] t : list) tb.add(t).add(' '); ts.add(tb.trim().finish()); } return ts; }
/** * Returns a scan-based index iterator. * * @param lex lexer, including the queried value * @return node iterator * @throws QueryException query exception */ private FTIndexIterator scan(final FTLexer lex) throws QueryException { final FTLexer input = new FTLexer(ftt.opt); final FTTokens fttokens = ftt.cache(lex.get()); return new FTIndexIterator() { final int sz = data.meta.size; int pre = -1, ps; @Override public int pre() { return pre; } @Override public boolean more() { while (++pre < sz) { if (data.kind(pre) != Data.TEXT) continue; input.init(data.text(pre, true)); matches.reset(ps); try { if (ftt.contains(fttokens, input) != 0) return true; } catch (final QueryException ignore) { // ignore exceptions } } return false; } @Override public FTMatches matches() { return matches; } @Override public void pos(final int p) { ps = p; } @Override public int size() { // worst case return Math.max(1, sz >>> 1); } }; }