Пример #1
0
  /**
   * Evaluates the full-text match.
   *
   * @param qc query context
   * @return number of tokens, used for scoring
   * @throws QueryException query exception
   */
  private int contains(final QueryContext qc) throws QueryException {
    first = true;
    final FTLexer lexer = ftt.lexer(qc.ftToken);

    // use faster evaluation for default options
    int num = 0;
    if (fast) {
      for (final byte[] t : tokens) {
        final FTTokens qtok = ftt.cache(t);
        num = Math.max(num, ftt.contains(qtok, lexer) * qtok.length());
      }
      return num;
    }

    // find and count all occurrences
    final boolean all = mode == FTMode.ALL || mode == FTMode.ALL_WORDS;
    int oc = 0;
    for (final byte[] w : unique(tokens(qc))) {
      final FTTokens qtok = ftt.cache(w);
      final int o = ftt.contains(qtok, lexer);
      if (all && o == 0) return 0;
      num = Math.max(num, o * qtok.length());
      oc += o;
    }

    // check if occurrences are in valid range. if yes, return number of tokens
    final long mn = occ != null ? toLong(occ[0], qc) : 1;
    final long mx = occ != null ? toLong(occ[1], qc) : Long.MAX_VALUE;
    if (mn == 0 && oc == 0) matches = FTNot.not(matches);
    return oc >= mn && oc <= mx ? Math.max(1, num) : 0;
  }
Пример #2
0
  @Override
  public boolean indexAccessible(final IndexInfo ii) {
    /* If the following conditions yield true, the index is accessed:
     * - all query terms are statically available
     * - no FTTimes option is specified
     * - explicitly set case, diacritics and stemming match options do not
     *   conflict with index options. */
    data = ii.ic.data;
    final MetaData md = data.meta;
    final FTOpt fto = ftt.opt;

    /* Index will be applied if no explicit match options have been set
     * that conflict with the index options. As a consequence, though, index-
     * based querying might yield other results than sequential scanning. */
    if (occ != null
        || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE)
        || fto.isSet(DC) && md.diacritics != fto.is(DC)
        || fto.isSet(ST) && md.stemming != fto.is(ST)
        || fto.ln != null && !fto.ln.equals(md.language)) return false;

    // adopt database options to tokenizer
    fto.copy(md);

    // estimate costs if text is not known at compile time
    if (tokens == null) {
      ii.costs = Math.max(2, data.meta.size / 30);
      return true;
    }

    // summarize number of hits; break loop if no hits are expected
    final FTLexer ft = new FTLexer(fto);
    ii.costs = 0;
    for (byte[] t : tokens) {
      ft.init(t);
      while (ft.hasNext()) {
        final byte[] tok = ft.nextToken();
        if (fto.sw != null && fto.sw.contains(tok)) continue;

        if (fto.is(WC)) {
          // don't use index if one of the terms starts with a wildcard
          t = ft.get();
          if (t[0] == '.') return false;
          // don't use index if certain characters or more than 1 dot are found
          int d = 0;
          for (final byte w : t) {
            if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false;
          }
        }
        // favor full-text index requests over exact queries
        final int costs = data.costs(ft);
        if (costs != 0) ii.costs += Math.max(2, costs / 100);
      }
    }
    return true;
  }
Пример #3
0
 /**
  * Merges two matches.
  *
  * @param i1 first item
  * @param i2 second item
  */
 private static void and(final FTNode i1, final FTNode i2) {
   final FTMatches all = new FTMatches((byte) Math.max(i1.matches().pos, i2.matches().pos));
   for (final FTMatch s1 : i1.matches()) {
     for (final FTMatch s2 : i2.matches()) {
       all.add(new FTMatch(s1.size() + s2.size()).add(s1).add(s2));
     }
   }
   i1.score(Scoring.avg(i1.score() + i2.score(), 2));
   i1.matches(all);
 }
Пример #4
0
  /**
   * Compiles the filter expression, excluding the root node.
   *
   * @param ctx query context
   * @return compiled expression
   */
  private Expr opt(final QueryContext ctx) {
    // evaluate return type
    final SeqType t = root.type();

    // determine number of results and type
    final long s = root.size();
    if (s != -1) {
      if (pos != null) {
        size = Math.max(0, s + 1 - pos.min) - Math.max(0, s - pos.max);
      } else if (last) {
        size = s > 0 ? 1 : 0;
      }
      // no results will remain: return empty sequence
      if (size == 0) return optPre(null, ctx);
      type = SeqType.get(t.type, size);
    } else {
      type = SeqType.get(t.type, t.zeroOrOne() ? Occ.ZERO_ONE : Occ.ZERO_MORE);
    }

    // no numeric predicates.. use simple iterator
    if (!super.has(Flag.FCS)) return new IterFilter(this);

    // one single position() or last() function specified: return single value
    if (preds.length == 1
        && (last || pos != null)
        && root.isValue()
        && t.one()
        && (last || pos.min == 1 && pos.max == 1)) return optPre(root, ctx);

    // only choose deterministic and context-independent offsets; e.g., skip:
    // (1 to 10)[random:integer(10)]  or  (1 to 10)[.]
    boolean off = false;
    if (preds.length == 1) {
      final Expr p = preds[0];
      final SeqType st = p.type();
      off = st.type.isNumber() && st.zeroOrOne() && !p.has(Flag.CTX) && !p.has(Flag.NDT);
      if (off) type = SeqType.get(type.type, Occ.ZERO_ONE);
    }

    // iterator for simple numeric predicate
    return off || useIterator() ? new IterPosFilter(this, off) : this;
  }