public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
private int readHeader(IndexInput in) throws IOException { return CodecUtil.checkHeader( in, FSTTermsWriter.TERMS_CODEC_NAME, FSTTermsWriter.TERMS_VERSION_START, FSTTermsWriter.TERMS_VERSION_CURRENT); }
public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) { // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an // ordinary codec header: version = CodecUtil.checkIndexHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); } else { version = CodecUtil.checkHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); } FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException( "no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent // unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM // here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
@Override public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer CodecUtil.checkHeader( termsIn, SepPostingsWriter.CODEC, SepPostingsWriter.VERSION_START, SepPostingsWriter.VERSION_START); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); skipMinimum = termsIn.readInt(); }
@Override public FieldInfos read( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { int codecVersion = CodecUtil.checkHeader( input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT); final int size = input.readVInt(); // read in the size FieldInfo infos[] = new FieldInfo[size]; for (int i = 0; i < size; i++) { String name = input.readString(); final int fieldNumber = input.readVInt(); if (fieldNumber < 0) { throw new CorruptIndexException( "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input); } byte bits = input.readByte(); boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; final IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS; } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.readByte(); final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map<String, String> attributes = input.readStringStringMap(); if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) { // Undead norms! Lucene42NormsProducer will check this and bring norms back from the // dead: UndeadNormsProducer.setUndead(attributes); } infos[i] = new FieldInfo( name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); infos[i].checkConsistency(); } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { CodecUtil.checkFooter(input); } else { CodecUtil.checkEOF(input); } return new FieldInfos(infos); } }
Lucene42DocValuesProducer( SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.maxDoc(); merging = false; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); boolean success = false; ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new HashMap<>(); binaries = new HashMap<>(); fsts = new HashMap<>(); numEntries = readFields(in, state.fieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.checkFooter(in); } else { CodecUtil.checkEOF(in); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); this.data = state.directory.openInput(dataName, state.context); success = false; try { final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException( "Format versions mismatch: meta=" + version + ", data=" + version2, data); } if (version >= VERSION_CHECKSUM) { // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(data); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this.data); } } }