private void saveAppropriatelySizedBloomFilter( IndexOutput bloomOutput, FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException { FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo, bloomFilter); if (rightSizedSet == null) { rightSizedSet = bloomFilter; } rightSizedSet.serialize(bloomOutput); }
public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
@Override public void finishTerm(BytesRef text, TermStats stats) throws IOException { // Record this term in our BloomFilter if (stats.docFreq > 0) { bloomFilter.addValue(text); } delegateTermsConsumer.finishTerm(text, stats); }
@Override public final boolean seekExact(BytesRef text, boolean useCache) throws IOException { // The magical fail-fast speed up that is the entire point of all of // this code - save a disk seek if there is a match on an in-memory // structure // that may occasionally give a false positive but guaranteed no false // negatives if (filter.contains(text) == ContainsResult.NO) { return false; } return delegate().seekExact(text, useCache); }