private void filterEntries() throws FileNotFoundException, IOException { final IntSet rejected = new IntOpenHashSet(); WeightedTokenSource entriesSource = BybloIO.openEntriesSource(activeEntriesFile, getCharset(), getIndexDeligate()); File outputFile = tempFiles.createFile(); WeightedTokenSink entriesSink = BybloIO.openEntriesSink(outputFile, getCharset(), getIndexDeligate()); progress.setMessage("Filtering entries."); final int filteredEntry = getIndexDeligate().getEntryEnumerator().indexOf(FILTERED_STRING); double filteredWeight = 0; long inCount = 0; long outCount = 0; while (entriesSource.hasNext()) { ++inCount; Weighted<Token> record = entriesSource.read(); if (record.record().id() == filteredEntry) { filteredWeight += record.weight(); } else if (acceptEntry.apply(record)) { entriesSink.write(record); ++outCount; } else { rejected.add(record.record().id()); filteredWeight += record.weight(); } if ((inCount % PROGRESS_INTERVAL == 0 || !entriesSource.hasNext()) && LOG.isInfoEnabled()) { progress.setMessage(format("Accepted {0} of {1} entries.", outCount, inCount)); LOG.debug(MiscUtil.memoryInfoString()); } } if (filteredWeight != 0) { entriesSink.write(new Weighted<Token>(new Token(filteredEntry), filteredWeight)); } entriesSource.close(); entriesSink.flush(); entriesSink.close(); if (!activeEntriesFile.equals(inputEntriesFile)) { activeEntriesFile.delete(); } entryFilterRequired = false; activeEntriesFile = outputFile; // Update the feature acceptance predicate if (rejected.size() > 0) { eventFilterRequired = true; acceptEvent = Predicates2.and( acceptEvent, Predicates2.compose(Predicates2.not(Predicates2.in(rejected)), eventEntryId())); } }