Пример #1
0
  /**
   * Extracts and indexes words from the specified data reference.
   *
   * @throws IOException I/O Exception
   */
  final void index() throws IOException {
    // delete old index
    abort();

    final Performance perf = Prop.debug ? new Performance() : null;
    Util.debug(det());

    for (pre = 0; pre < size; ++pre) {
      if ((pre & 0xFFFF) == 0) check();

      final int k = data.kind(pre);
      if (k != Data.TEXT) {
        if (scm == 1 && k == Data.DOC) unit.add(pre);
        continue;
      }
      if (scm == 2) unit.add(pre);

      pos = -1;
      final StopWords sw = lex.ftOpt().sw;
      lex.init(data.text(pre, true));
      while (lex.hasNext()) {
        final byte[] tok = lex.nextToken();
        ++pos;
        // skip too long and stopword tokens
        if (tok.length <= data.meta.maxlen && (sw.size() == 0 || !sw.contains(tok))) {
          // check if main memory is exhausted
          if ((ntok++ & 0xFFF) == 0 && scm == 0 && memFull()) {
            // currently no frequency support for tf/idf based scoring
            writeIndex(csize++);
            Performance.gc(2);
          }
          index(tok);
        }
      }
    }

    // calculate term frequencies
    if (scm > 0) {
      maxfreq = new int[unit.size() + 1];
      ntoken = new int[nrTokens()];
      token = 0;
      calcFreq();
    }

    // write tokens
    token = 0;
    write();

    // set meta data
    if (scm > 0) {
      data.meta.maxscore = max;
      data.meta.minscore = min;
    }
    data.meta.ftxtindex = true;
    Util.memory(perf);
  }
Пример #2
0
  @Override
  public synchronized IndexIterator iter(final IndexToken it) {
    final byte[] tok = it.get();

    // wildcard search
    final FTLexer lexer = (FTLexer) it;
    final FTOpt opt = lexer.ftOpt();
    if (opt.is(WC)) return wc(tok);

    // fuzzy search
    if (opt.is(FZ)) return fuzzy(tok, lexer.lserror(tok));

    // return cached or new result
    final IndexEntry e = entry(tok);
    return e.size > 0 ? iter(e.offset, e.size, inZ, tok) : FTIndexIterator.FTEMPTY;
  }