@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { PostingsReaderBase postingsReader = new SepPostingsReader( state.dir, state.segmentInfo, state.context, new MockIntFactory(blockSize), state.segmentSuffix); TermsIndexReaderBase indexReader; boolean success = false; try { indexReader = new FixedGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, IOContext.DEFAULT); success = true; } finally { if (!success) { postingsReader.close(); } } success = false; try { FieldsProducer ret = new BlockTermsReader( indexReader, state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, 1024, state.segmentSuffix); success = true; return ret; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } }
@Override public long ramBytesUsed() { long ramBytesUsed = postingsReader.ramBytesUsed(); for (TermsReader r : fields.values()) { ramBytesUsed += r.ramBytesUsed(); } return ramBytesUsed; }
@Override public void checkIntegrity() throws IOException { // term dictionary CodecUtil.checksumEntireFile(termsIn); // postings postingsReader.checkIntegrity(); }
@Override public long ramBytesUsed() { long sizeInBytes = postingsReader.ramBytesUsed(); for (FieldReader reader : fields.values()) { sizeInBytes += reader.ramBytesUsed(); } return sizeInBytes; }
@Override public void checkIntegrity() throws IOException { // verify terms CodecUtil.checksumEntireFile(in); // verify postings postingsReader.checkIntegrity(); }
@Override public long ramBytesUsed() { long ramBytesUsed = BASE_RAM_BYTES_USED; ramBytesUsed += (postingsReader != null) ? postingsReader.ramBytesUsed() : 0; ramBytesUsed += (indexReader != null) ? indexReader.ramBytesUsed() : 0; ramBytesUsed += fields.size() * 2L * RamUsageEstimator.NUM_BYTES_OBJECT_REF; for (FieldReader reader : fields.values()) { ramBytesUsed += reader.ramBytesUsed(); } return ramBytesUsed; }
@Override public void close() throws IOException { try { try { if (indexReader != null) { indexReader.close(); } } finally { // null so if an app hangs on to us (ie, we are not // GCable, despite being closed) we still free most // ram indexReader = null; if (in != null) { in.close(); } } } finally { if (postingsReader != null) { postingsReader.close(); } } }
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT); final IndexInput in = state.dir.openInput(seedFileName, state.context); final long seed = in.readLong(); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed); } in.close(); final Random random = new Random(seed); int readBufferSize = _TestUtil.nextInt(random, 1, 4096); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader( state.dir, state.segmentInfo, state.context, new MockIntStreamFactory(random), state.segmentSuffix); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene40PostingsReader( state.dir, state.segmentInfo, state.context, state.segmentSuffix); } if (random.nextBoolean()) { final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(postingsReader); } final FieldsProducer fields; if (random.nextBoolean()) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading BlockTree terms dict"); } boolean success = false; try { fields = new BlockTreeTermsReader( state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, state.segmentSuffix, state.termsIndexDivisor); success = true; } finally { if (!success) { postingsReader.close(); } } } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Block terms dict"); } final TermsIndexReaderBase indexReader; boolean success = false; try { final boolean doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: if (state.termsIndexDivisor != -1) { state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); } else { final int n2 = random.nextInt(3); if (n2 == 1) { random.nextInt(); } else if (n2 == 2) { random.nextLong(); } if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, state.segmentSuffix, state.context); } success = true; } finally { if (!success) { postingsReader.close(); } } final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024); success = false; try { fields = new BlockTermsReader( indexReader, state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, termsCacheSize, state.segmentSuffix); success = true; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } } return fields; }
@Override public void checkIntegrity() throws IOException { postingsReader.checkIntegrity(); }
/** Sole constructor. */ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null; this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader( termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader( indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put( fieldInfo.name, new FieldReader( this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
public BlockTermsReader( TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { this.postingsReader = postingsReader; String filename = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION); in = state.directory.openInput(filename, state.context); boolean success = false; try { CodecUtil.checkIndexHeader( in, BlockTermsWriter.CODEC_NAME, BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // Have PostingsReader init itself postingsReader.init(in, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(in); // Read per-field details seekDir(in); final int numFields = in.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid number of fields: " + numFields, in); } for (int i = 0; i < numFields; i++) { final int field = in.readVInt(); final long numTerms = in.readVLong(); assert numTerms >= 0; final long termsStartPointer = in.readVLong(); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); final long sumDocFreq = in.readVLong(); final int docCount = in.readVInt(); final int longsSize = in.readVInt(); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } FieldReader previous = fields.put( fieldInfo.name, new FieldReader( fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in); } } success = true; } finally { if (!success) { in.close(); } } this.indexReader = indexReader; }