/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static final SegmentInfos readCommit(Directory directory, String segmentFileName) throws IOException { long generation = generationFromSegmentsFileName(segmentFileName); try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) { // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need // to read the magic ourselves. int magic = input.readInt(); if (magic != CodecUtil.CODEC_MAGIC) { throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } // 4.0+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT); // 5.0+ byte id[] = null; if (format >= VERSION_50) { id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); } SegmentInfos infos = new SegmentInfos(); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; if (format >= VERSION_53) { // TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... // or just rely on the // minSegmentLuceneVersion check instead: infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); } else { // else compute the min version down below in the for loop } infos.version = input.readLong(); infos.counter = input.readInt(); int numSegments = input.readInt(); if (numSegments < 0) { throw new CorruptIndexException("invalid segment count: " + numSegments, input); } if (format >= VERSION_53) { if (numSegments > 0) { infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) { throw new IndexFormatTooOldException( input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")"); } } else { // else leave as null: no segments } } else { // else we recompute it below as we visit segments; it can't be used for throwing // IndexFormatTooOldExc, but consumers of // SegmentInfos can maybe still use it for other reasons } long totalDocs = 0; for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); final byte segmentID[]; if (format >= VERSION_50) { byte hasID = input.readByte(); if (hasID == 1) { segmentID = new byte[StringHelper.ID_LENGTH]; input.readBytes(segmentID, 0, segmentID.length); } else if (hasID == 0) { segmentID = null; // 4.x segment, doesn't have an ID } else { throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input); } } else { segmentID = null; } Codec codec = readCodec(input, format < VERSION_53); SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); info.setCodec(codec); totalDocs += info.maxDoc(); long delGen = input.readLong(); int delCount = input.readInt(); if (delCount < 0 || delCount > info.maxDoc()) { throw new CorruptIndexException( "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); } long fieldInfosGen = -1; if (format >= VERSION_46) { fieldInfosGen = input.readLong(); } long dvGen = -1; if (format >= VERSION_49) { dvGen = input.readLong(); } else { dvGen = fieldInfosGen; } SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen); if (format >= VERSION_46) { if (format < VERSION_49) { // Recorded per-generation files, which were buggy (see // LUCENE-5636). We need to read and keep them so we continue to // reference those files. Unfortunately it means that the files will // be referenced even if the fields are updated again, until the // segment is merged. final int numGensUpdatesFiles = input.readInt(); final Map<Long, Set<String>> genUpdatesFiles; if (numGensUpdatesFiles == 0) { genUpdatesFiles = Collections.emptyMap(); } else { genUpdatesFiles = new HashMap<>(numGensUpdatesFiles); for (int i = 0; i < numGensUpdatesFiles; i++) { genUpdatesFiles.put(input.readLong(), input.readStringSet()); } } siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } else { if (format >= VERSION_51) { siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); } else { siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet())); } final Map<Integer, Set<String>> dvUpdateFiles; final int numDVFields = input.readInt(); if (numDVFields == 0) { dvUpdateFiles = Collections.emptyMap(); } else { Map<Integer, Set<String>> map = new HashMap<>(numDVFields); for (int i = 0; i < numDVFields; i++) { if (format >= VERSION_51) { map.put(input.readInt(), input.readSetOfStrings()); } else { map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet())); } } dvUpdateFiles = Collections.unmodifiableMap(map); } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); } } infos.add(siPerCommit); Version segmentVersion = info.getVersion(); if (format < VERSION_53) { if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { infos.minSegmentLuceneVersion = segmentVersion; } } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { throw new CorruptIndexException( "segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); } } if (format >= VERSION_51) { infos.userData = input.readMapOfStrings(); } else { infos.userData = Collections.unmodifiableMap(input.readStringStringMap()); } if (format >= VERSION_48) { CodecUtil.checkFooter(input); } else { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException( "checksum failed (hardware problem?) : expected=" + Long.toHexString(checksumThen) + " actual=" + Long.toHexString(checksumNow), input); } CodecUtil.checkEOF(input); } // LUCENE-6299: check we are in bounds if (totalDocs > IndexWriter.getActualMaxDocs()) { throw new CorruptIndexException( "Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); } return infos; } }
@Override public FieldInfos read( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { int codecVersion = CodecUtil.checkHeader( input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT); final int size = input.readVInt(); // read in the size FieldInfo infos[] = new FieldInfo[size]; for (int i = 0; i < size; i++) { String name = input.readString(); final int fieldNumber = input.readVInt(); if (fieldNumber < 0) { throw new CorruptIndexException( "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input); } byte bits = input.readByte(); boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; final IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS; } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.readByte(); final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map<String, String> attributes = input.readStringStringMap(); if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) { // Undead norms! Lucene42NormsProducer will check this and bring norms back from the // dead: UndeadNormsProducer.setUndead(attributes); } infos[i] = new FieldInfo( name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); infos[i].checkConsistency(); } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { CodecUtil.checkFooter(input); } else { CodecUtil.checkEOF(input); } return new FieldInfos(infos); } }
Lucene42DocValuesProducer( SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.maxDoc(); merging = false; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); boolean success = false; ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new HashMap<>(); binaries = new HashMap<>(); fsts = new HashMap<>(); numEntries = readFields(in, state.fieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.checkFooter(in); } else { CodecUtil.checkEOF(in); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); this.data = state.directory.openInput(dataName, state.context); success = false; try { final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException( "Format versions mismatch: meta=" + version + ", data=" + version2, data); } if (version >= VERSION_CHECKSUM) { // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(data); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this.data); } } }