@Override protected int readSkipData(int level, IndexInput skipStream) throws IOException { int delta; if (currentFieldStoresPayloads || currentFieldStoresOffsets) { // the current field stores payloads and/or offsets. // if the doc delta is odd then we have // to read the current payload/offset lengths // because it differs from the lengths of the // previous payload/offset delta = skipStream.readVInt(); if ((delta & 1) != 0) { if (currentFieldStoresPayloads) { payloadLength[level] = skipStream.readVInt(); } if (currentFieldStoresOffsets) { offsetLength[level] = skipStream.readVInt(); } } delta >>>= 1; } else { delta = skipStream.readVInt(); } freqPointer[level] += skipStream.readVInt(); proxPointer[level] += skipStream.readVInt(); return delta; }
@Override protected int readSkipData(int level, IndexInput skipStream) throws IOException { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.readVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream.readVInt(); } delta >>>= 1; } else { delta = skipStream.readVInt(); } if (!omitTF) { freqIndex[level].read(skipStream, false); } docIndex[level].read(skipStream, false); if (!omitTF) { posIndex[level].read(skipStream, false); payloadPointer[level] += skipStream.readVInt(); } return delta; }
/* Does initial decode of next block of terms; this doesn't actually decode the docFreq, totalTermFreq, postings details (frq/prx offset, etc.) metadata; it just loads them as byte[] blobs which are then decoded on-demand if the metadata is ever requested for any term in this block. This enables terms-only intensive consumes (eg certain MTQs, respelling) to not pay the price of decoding metadata they won't use. */ private boolean nextBlock() throws IOException { // TODO: we still lazy-decode the byte[] for each // term (the suffix), but, if we decoded // all N terms up front then seeking could do a fast // bsearch w/in the block... // System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this); state.blockFilePointer = in.getFilePointer(); blockTermCount = in.readVInt(); // System.out.println(" blockTermCount=" + blockTermCount); if (blockTermCount == 0) { return false; } termBlockPrefix = in.readVInt(); // term suffixes: int len = in.readVInt(); if (termSuffixes.length < len) { termSuffixes = new byte[ArrayUtil.oversize(len, 1)]; } // System.out.println(" termSuffixes len=" + len); in.readBytes(termSuffixes, 0, len); termSuffixesReader.reset(termSuffixes, 0, len); // docFreq, totalTermFreq len = in.readVInt(); if (docFreqBytes.length < len) { docFreqBytes = new byte[ArrayUtil.oversize(len, 1)]; } // System.out.println(" freq bytes len=" + len); in.readBytes(docFreqBytes, 0, len); freqReader.reset(docFreqBytes, 0, len); // metadata len = in.readVInt(); if (bytes == null) { bytes = new byte[ArrayUtil.oversize(len, 1)]; bytesReader = new ByteArrayDataInput(); } else if (bytes.length < len) { bytes = new byte[ArrayUtil.oversize(len, 1)]; } in.readBytes(bytes, 0, len); bytesReader.reset(bytes, 0, len); metaDataUpto = 0; state.termBlockOrd = 0; indexIsCurrent = false; // System.out.println(" indexIsCurrent=" + indexIsCurrent); return true; }
private static BytesRef readBytesRef(IndexInput in) throws IOException { BytesRef bytes = new BytesRef(); bytes.length = in.readVInt(); bytes.bytes = new byte[bytes.length]; in.readBytes(bytes.bytes, 0, bytes.length); return bytes; }
private void addField( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { // we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field( fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field( fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); } }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; final IndexInput in = state.directory.openInput(termsFileName, state.context); boolean success = false; try { version = readHeader(in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.checksumEntireFile(in); } this.postingsReader.init(in); seekDir(in); final FieldInfos fieldInfos = state.fieldInfos; final int numFields = in.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = in.readVInt(); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); long numTerms = in.readVLong(); long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); long sumDocFreq = in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); TermsReader current = new TermsReader( fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, in, current, previous); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } }
public Term next() { assert hasNext(); try { int code = input.readVInt(); if ((code & 1) != 0) { // new field field = input.readString(); } int prefix = code >>> 1; int suffix = input.readVInt(); bytes.grow(prefix + suffix); input.readBytes(bytes.bytes, prefix, suffix); bytes.length = prefix + suffix; term.set(field, bytes); return term; } catch (IOException e) { throw new RuntimeException(e); } }
public boolean next() throws IOException { pointer++; if (pointer < docFreq) { in.seek(postingMaps[pointer].offset); freq = in.readVInt(); position = 0; return true; } return false; }
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order // byte first; char = 2 bytes) // Read just the size -- caller must skip the field content to continue reading fields // Return the size in bytes or chars, depending on field type private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException { int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2 * size; byte[] sizebytes = new byte[4]; sizebytes[0] = (byte) (bytesize >>> 24); sizebytes[1] = (byte) (bytesize >>> 16); sizebytes[2] = (byte) (bytesize >>> 8); sizebytes[3] = (byte) bytesize; doc.add(new Field(fi.name, sizebytes, Field.Store.YES)); return size; }
final Document doc(int n, FieldSelector fieldSelector) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); Document doc = new Document(); int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; // TODO: Find an alternative approach here if this list continues to grow beyond the // list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.equals(FieldSelectorResult.LOAD)) { addField(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) { addFieldForMerge(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) { addField(doc, fi, binary, compressed, tokenize); break; // Get out of this loop } else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) { addFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.SIZE)) { skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)) { addFieldSize(doc, fi, binary, compressed); break; } else { skipField(binary, compressed); } } return doc; }
private void addFieldLazy( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { if (binary == true) { int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); if (compressed) { // was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); } else { // was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); } // Need to move the pointer ahead by toRead positions fieldsStream.seek(pointer + toRead); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); f = new LazyField(fi.name, store, toRead, pointer); // skip over the part that we aren't loading fieldsStream.seek(pointer + toRead); f.setOmitNorms(fi.omitNorms); } else { int length = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); // Skip ahead of where we are by the length of what is stored fieldsStream.skipChars(length); f = new LazyField(fi.name, store, index, termVector, length, pointer); f.setOmitNorms(fi.omitNorms); } doc.add(f); } }
// in merge mode we don't uncompress the data of a compressed field private void addFieldForMerge( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { Object data; if (binary || compressed) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); }
@Override public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { final SepTermState termState = (SepTermState) _termState; // System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer()); final int len = termsIn.readVInt(); // System.out.println(" numBytes=" + len); if (termState.bytes == null) { termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; termState.bytesReader = new ByteArrayDataInput(termState.bytes); } else if (termState.bytes.length < len) { termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; } termState.bytesReader.reset(termState.bytes, 0, len); termsIn.readBytes(termState.bytes, 0, len); }
private int readFields(IndexInput meta, FieldInfos infos) throws IOException { int numEntries = 0; int fieldNumber = meta.readVInt(); while (fieldNumber != -1) { numEntries++; FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); } int fieldType = meta.readByte(); if (fieldType == NUMBER) { NumericEntry entry = new NumericEntry(); entry.offset = meta.readLong(); entry.format = meta.readByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format, meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.readVInt(); } numerics.put(info.name, entry); } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.offset = meta.readLong(); entry.numBytes = meta.readLong(); entry.minLength = meta.readVInt(); entry.maxLength = meta.readVInt(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.readVInt(); entry.blockSize = meta.readVInt(); } binaries.put(info.name, entry); } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.offset = meta.readLong(); entry.numOrds = meta.readVLong(); fsts.put(info.name, entry); } else { throw new CorruptIndexException("invalid entry type: " + fieldType, meta); } fieldNumber = meta.readVInt(); } return numEntries; }
public BlockTermsReader( TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { this.postingsReader = postingsReader; String filename = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION); in = state.directory.openInput(filename, state.context); boolean success = false; try { CodecUtil.checkIndexHeader( in, BlockTermsWriter.CODEC_NAME, BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // Have PostingsReader init itself postingsReader.init(in, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(in); // Read per-field details seekDir(in); final int numFields = in.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid number of fields: " + numFields, in); } for (int i = 0; i < numFields; i++) { final int field = in.readVInt(); final long numTerms = in.readVLong(); assert numTerms >= 0; final long termsStartPointer = in.readVLong(); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); final long sumDocFreq = in.readVLong(); final int docCount = in.readVInt(); final int longsSize = in.readVInt(); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } FieldReader previous = fields.put( fieldInfo.name, new FieldReader( fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in); } } success = true; } finally { if (!success) { in.close(); } } this.indexReader = indexReader; }
public void testRead() throws IOException { IndexInput is = new MockIndexInput( new byte[] { (byte) 0x80, 0x01, (byte) 0xFF, 0x7F, (byte) 0x80, (byte) 0x80, 0x01, (byte) 0x81, (byte) 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e', // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 0x02, (byte) 0xC2, (byte) 0xBF, 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, 'c', 'e', (byte) 0xC2, (byte) 0xBF, 'n', 'e', // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 'n', 'e', // surrogate pairs // (U+1D11E "MUSICAL SYMBOL G CLEF") // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 0x0E, 'L', 'u', (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 'c', 'e', (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 'n', 'e', // null bytes 0x01, 0x00, 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e', }); assertEquals(128, is.readVInt()); assertEquals(16383, is.readVInt()); assertEquals(16384, is.readVInt()); assertEquals(16385, is.readVInt()); assertEquals("Lucene", is.readString()); assertEquals("\u00BF", is.readString()); assertEquals("Lu\u00BFce\u00BFne", is.readString()); assertEquals("\u2620", is.readString()); assertEquals("Lu\u2620ce\u2620ne", is.readString()); assertEquals("\uD834\uDD1E", is.readString()); assertEquals("\uD834\uDD1E\uD834\uDD60", is.readString()); assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne", is.readString()); assertEquals("\u0000", is.readString()); assertEquals("Lu\u0000ce\u0000ne", is.readString()); }
@Override public int readVInt() throws IOException { return delegate.readVInt(); }
private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.name); data.seek(entry.offset); switch (entry.format) { case TABLE_COMPRESSED: int size = data.readVInt(); if (size > 256) { throw new CorruptIndexException( "TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + size, data); } final long decode[] = new long[size]; for (int i = 0; i < decode.length; i++) { decode[i] = data.readLong(); } final int formatID = data.readVInt(); final int bitsPerValue = data.readVInt(); final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader( data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue); if (!merging) { ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed()); numericInfo.put(field.name, ordsReader); } return new NumericDocValues() { @Override public long get(int docID) { return decode[(int) ordsReader.get(docID)]; } }; case DELTA_COMPRESSED: final int blockSize = data.readVInt(); final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false); if (!merging) { ramBytesUsed.addAndGet(reader.ramBytesUsed()); numericInfo.put(field.name, reader); } return reader; case UNCOMPRESSED: final byte bytes[] = new byte[maxDoc]; data.readBytes(bytes, 0, bytes.length); if (!merging) { ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes)); numericInfo.put(field.name, Accountables.namedAccountable("byte array", maxDoc)); } return new NumericDocValues() { @Override public long get(int docID) { return bytes[docID]; } }; case GCD_COMPRESSED: final long min = data.readLong(); final long mult = data.readLong(); final int quotientBlockSize = data.readVInt(); final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false); if (!merging) { ramBytesUsed.addAndGet(quotientReader.ramBytesUsed()); numericInfo.put(field.name, quotientReader); } return new NumericDocValues() { @Override public long get(int docID) { return min + mult * quotientReader.get(docID); } }; default: throw new AssertionError(); } }
public int nextPosition() throws IOException { int positionIncrement = in.readVInt(); position += positionIncrement; return position; }
/** * Skip the field. We still have to read some of the information about the field, but can skip * past the actual content. This will have the most payoff on large fields. */ private void skipField(boolean binary, boolean compressed) throws IOException { skipField(binary, compressed, fieldsStream.readVInt()); }
/** Sole constructor. */ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null; this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader( termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader( indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put( fieldInfo.name, new FieldReader( this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }