Beispiel #1
0
  /**
   * Evaluates the full-text match.
   *
   * @param qc query context
   * @return number of tokens, used for scoring
   * @throws QueryException query exception
   */
  private int contains(final QueryContext qc) throws QueryException {
    first = true;
    final FTLexer lexer = ftt.lexer(qc.ftToken);

    // use faster evaluation for default options
    int num = 0;
    if (fast) {
      for (final byte[] t : tokens) {
        final FTTokens qtok = ftt.cache(t);
        num = Math.max(num, ftt.contains(qtok, lexer) * qtok.length());
      }
      return num;
    }

    // find and count all occurrences
    final boolean all = mode == FTMode.ALL || mode == FTMode.ALL_WORDS;
    int oc = 0;
    for (final byte[] w : unique(tokens(qc))) {
      final FTTokens qtok = ftt.cache(w);
      final int o = ftt.contains(qtok, lexer);
      if (all && o == 0) return 0;
      num = Math.max(num, o * qtok.length());
      oc += o;
    }

    // check if occurrences are in valid range. if yes, return number of tokens
    final long mn = occ != null ? toLong(occ[0], qc) : 1;
    final long mx = occ != null ? toLong(occ[1], qc) : Long.MAX_VALUE;
    if (mn == 0 && oc == 0) matches = FTNot.not(matches);
    return oc >= mn && oc <= mx ? Math.max(1, num) : 0;
  }
Beispiel #2
0
  @Override
  public boolean indexAccessible(final IndexInfo ii) {
    /* If the following conditions yield true, the index is accessed:
     * - all query terms are statically available
     * - no FTTimes option is specified
     * - explicitly set case, diacritics and stemming match options do not
     *   conflict with index options. */
    data = ii.ic.data;
    final MetaData md = data.meta;
    final FTOpt fto = ftt.opt;

    /* Index will be applied if no explicit match options have been set
     * that conflict with the index options. As a consequence, though, index-
     * based querying might yield other results than sequential scanning. */
    if (occ != null
        || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE)
        || fto.isSet(DC) && md.diacritics != fto.is(DC)
        || fto.isSet(ST) && md.stemming != fto.is(ST)
        || fto.ln != null && !fto.ln.equals(md.language)) return false;

    // adopt database options to tokenizer
    fto.copy(md);

    // estimate costs if text is not known at compile time
    if (tokens == null) {
      ii.costs = Math.max(2, data.meta.size / 30);
      return true;
    }

    // summarize number of hits; break loop if no hits are expected
    final FTLexer ft = new FTLexer(fto);
    ii.costs = 0;
    for (byte[] t : tokens) {
      ft.init(t);
      while (ft.hasNext()) {
        final byte[] tok = ft.nextToken();
        if (fto.sw != null && fto.sw.contains(tok)) continue;

        if (fto.is(WC)) {
          // don't use index if one of the terms starts with a wildcard
          t = ft.get();
          if (t[0] == '.') return false;
          // don't use index if certain characters or more than 1 dot are found
          int d = 0;
          for (final byte w : t) {
            if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false;
          }
        }
        // favor full-text index requests over exact queries
        final int costs = data.costs(ft);
        if (costs != 0) ii.costs += Math.max(2, costs / 100);
      }
    }
    return true;
  }
Beispiel #3
0
  /**
   * Returns a pre value.
   *
   * @param id unique node id
   * @return pre value or -1 if id was not found
   */
  final int preold(final int id) {
    // find pre value in table
    for (int p = Math.max(0, id); p < meta.size; ++p) if (id == id(p)) return p;
    final int ps = Math.min(meta.size, id);
    for (int p = 0; p < ps; ++p) if (id == id(p)) return p;

    // id not found
    return -1;
  }
Beispiel #4
0
  @Override
  public synchronized int costs(final IndexToken it) {
    final byte[] tok = it.get();
    if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE;

    // estimate costs for queries which stretch over multiple index entries
    final FTOpt opt = ((FTLexer) it).ftOpt();
    if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4);

    return entry(tok).size;
  }
Beispiel #5
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }
Beispiel #6
0
 /**
  * Prints the current stack trace to System.err.
  *
  * @param i number of steps to print
  */
 public static void stack(final int i) {
   errln("You're here:");
   final String[] stack = toArray(new Throwable());
   final int l = Math.min(Math.max(2, i + 2), stack.length);
   for (int s = 2; s < l; ++s) errln(stack[s]);
 }
Beispiel #7
0
  /**
   * Analyzes the specified patterns.
   *
   * @param patterns patterns
   * @return picture variables
   */
  private Picture[] analyze(final byte[][] patterns) {
    // pictures
    final int picL = patterns.length;
    final Picture[] pics = new Picture[picL];

    // analyze patterns
    for (int p = 0; p < picL; p++) {
      final byte[] pt = patterns[p];
      final Picture pic = new Picture();

      // position (integer/fractional)
      int pos = 0;
      // active character found
      boolean act = false;
      // number of characters after exponent
      int exp = -1;
      // number of optional characters
      final int[] opt = new int[2];

      // loop through all characters
      final int pl = pt.length;
      for (int i = 0, cl; i < pl; i += cl) {
        final int ch = ch(pt, i);
        cl = cl(pt, i);
        boolean active = contains(actives, ch);

        if (ch == decimal) {
          ++pos;
          act = false;
        } else if (ch == optional) {
          opt[pos]++;
        } else if (ch == exponent) {
          if (act && containsActive(pt, i + cl)) {
            exp = 0;
          } else {
            active = false;
          }
        } else if (ch == grouping) {
          if (pos == 0) pic.group[pos] = Array.add(pic.group[pos], pic.min[pos] + opt[pos]);
        } else if (contains(digits, ch)) {
          if (exp == -1) pic.min[pos]++;
          else exp++;
        }

        if (active) {
          act = true;
        } else {
          // passive characters
          pic.pc |= ch == percent;
          pic.pm |= ch == permille;
          // prefixes/suffixes
          pic.prefSuf[pos == 0 && act ? pos + 1 : pos].add(ch);
        }
      }
      // finalize integer-part-grouping-positions
      final int[] igp = pic.group[0];
      final int igl = igp.length;
      for (int g = 0; g < igl; ++g) igp[g] = pic.min[0] + opt[0] - igp[g];

      // check if integer-part-grouping-positions are regular
      // if yes, they are replaced with a single position
      if (igl > 1) {
        boolean reg = true;
        final int i = igp[igl - 1];
        for (int g = igl - 2; g >= 0; --g) reg &= i * igl == igp[g];
        if (reg) pic.group[0] = new int[] {i};
      }

      pic.maxFrac = pic.min[1] + opt[1];
      pic.minExp = Math.max(0, exp);
      pics[p] = pic;
    }
    return pics;
  }