Ejemplo n.º 1
0
  @Override
  public synchronized int costs(final IndexToken it) {
    final byte[] tok = it.get();
    if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE;

    // estimate costs for queries which stretch over multiple index entries
    final FTOpt opt = ((FTLexer) it).ftOpt();
    if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4);

    return entry(tok).size;
  }
Ejemplo n.º 2
0
  @Override
  public synchronized IndexIterator iter(final IndexToken it) {
    final byte[] tok = it.get();

    // wildcard search
    final FTLexer lexer = (FTLexer) it;
    final FTOpt opt = lexer.ftOpt();
    if (opt.is(WC)) return wc(tok);

    // fuzzy search
    if (opt.is(FZ)) return fuzzy(tok, lexer.lserror(tok));

    // return cached or new result
    final IndexEntry e = entry(tok);
    return e.size > 0 ? iter(e.offset, e.size, inZ, tok) : FTIndexIterator.FTEMPTY;
  }
Ejemplo n.º 3
0
  @Override
  public boolean indexAccessible(final IndexInfo ii) {
    /* If the following conditions yield true, the index is accessed:
     * - all query terms are statically available
     * - no FTTimes option is specified
     * - explicitly set case, diacritics and stemming match options do not
     *   conflict with index options. */
    data = ii.ic.data;
    final MetaData md = data.meta;
    final FTOpt fto = ftt.opt;

    /* Index will be applied if no explicit match options have been set
     * that conflict with the index options. As a consequence, though, index-
     * based querying might yield other results than sequential scanning. */
    if (occ != null
        || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE)
        || fto.isSet(DC) && md.diacritics != fto.is(DC)
        || fto.isSet(ST) && md.stemming != fto.is(ST)
        || fto.ln != null && !fto.ln.equals(md.language)) return false;

    // adopt database options to tokenizer
    fto.copy(md);

    // estimate costs if text is not known at compile time
    if (tokens == null) {
      ii.costs = Math.max(2, data.meta.size / 30);
      return true;
    }

    // summarize number of hits; break loop if no hits are expected
    final FTLexer ft = new FTLexer(fto);
    ii.costs = 0;
    for (byte[] t : tokens) {
      ft.init(t);
      while (ft.hasNext()) {
        final byte[] tok = ft.nextToken();
        if (fto.sw != null && fto.sw.contains(tok)) continue;

        if (fto.is(WC)) {
          // don't use index if one of the terms starts with a wildcard
          t = ft.get();
          if (t[0] == '.') return false;
          // don't use index if certain characters or more than 1 dot are found
          int d = 0;
          for (final byte w : t) {
            if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false;
          }
        }
        // favor full-text index requests over exact queries
        final int costs = data.costs(ft);
        if (costs != 0) ii.costs += Math.max(2, costs / 100);
      }
    }
    return true;
  }
Ejemplo n.º 4
0
 /**
  * Constructor.
  *
  * @param fto (optional) full-text options
  */
 JapaneseTokenizer(final FTOpt fto) {
   cs = fto != null && fto.cs != null ? fto.cs : FTCase.INSENSITIVE;
   wc = fto != null && fto.is(WC);
   dc = fto != null && fto.is(DC);
   st = fto != null && fto.is(ST);
 }