Exemplo n.º 1
0
  private void filterEntries() throws FileNotFoundException, IOException {

    final IntSet rejected = new IntOpenHashSet();

    WeightedTokenSource entriesSource =
        BybloIO.openEntriesSource(activeEntriesFile, getCharset(), getIndexDeligate());

    File outputFile = tempFiles.createFile();

    WeightedTokenSink entriesSink =
        BybloIO.openEntriesSink(outputFile, getCharset(), getIndexDeligate());

    progress.setMessage("Filtering entries.");

    final int filteredEntry = getIndexDeligate().getEntryEnumerator().indexOf(FILTERED_STRING);
    double filteredWeight = 0;

    long inCount = 0;
    long outCount = 0;
    while (entriesSource.hasNext()) {
      ++inCount;
      Weighted<Token> record = entriesSource.read();

      if (record.record().id() == filteredEntry) {
        filteredWeight += record.weight();
      } else if (acceptEntry.apply(record)) {
        entriesSink.write(record);
        ++outCount;
      } else {
        rejected.add(record.record().id());
        filteredWeight += record.weight();
      }

      if ((inCount % PROGRESS_INTERVAL == 0 || !entriesSource.hasNext()) && LOG.isInfoEnabled()) {
        progress.setMessage(format("Accepted {0} of {1} entries.", outCount, inCount));
        LOG.debug(MiscUtil.memoryInfoString());
      }
    }

    if (filteredWeight != 0) {
      entriesSink.write(new Weighted<Token>(new Token(filteredEntry), filteredWeight));
    }

    entriesSource.close();
    entriesSink.flush();
    entriesSink.close();

    if (!activeEntriesFile.equals(inputEntriesFile)) {
      activeEntriesFile.delete();
    }

    entryFilterRequired = false;
    activeEntriesFile = outputFile;

    // Update the feature acceptance predicate
    if (rejected.size() > 0) {
      eventFilterRequired = true;
      acceptEvent =
          Predicates2.and(
              acceptEvent,
              Predicates2.compose(Predicates2.not(Predicates2.in(rejected)), eventEntryId()));
    }
  }