/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public final void read(Directory directory, String segmentFileName) throws CorruptIndexException, IOException { boolean success = false; // Clear any previous segments: this.clear(); ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName)); generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; try { int format = input.readInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < CURRENT_FORMAT) throw new CorruptIndexException("Unknown format version: " + format); version = input.readLong(); // read version counter = input.readInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.readInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = new SegmentInfo(directory, format, input); if (si.getVersion() == null) { // It's a pre-3.1 segment, upgrade its version to either 3.0 or 2.x Directory dir = directory; if (si.getDocStoreOffset() != -1) { if (si.getDocStoreIsCompoundFile()) { dir = new CompoundFileReader( dir, IndexFileNames.segmentFileName( si.getDocStoreSegment(), IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024); } } else if (si.getUseCompoundFile()) { dir = new CompoundFileReader( dir, IndexFileNames.segmentFileName(si.name, IndexFileNames.COMPOUND_FILE_EXTENSION), 1024); } try { String store = si.getDocStoreOffset() != -1 ? si.getDocStoreSegment() : si.name; si.setVersion(FieldsReader.detectCodeVersion(dir, store)); } finally { // If we opened the directory, close it if (dir != directory) dir.close(); } } add(si); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.getFilePointer() >= input.length()) version = System.currentTimeMillis(); // old file format without version number else version = input.readLong(); // read version } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) { userData = input.readStringStringMap(); } else if (0 != input.readByte()) { userData = Collections.singletonMap("userData", input.readString()); } else { userData = Collections.<String, String>emptyMap(); } } else { userData = Collections.<String, String>emptyMap(); } if (format <= FORMAT_CHECKSUM) { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) throw new CorruptIndexException("checksum mismatch in segments file"); } success = true; } finally { input.close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: this.clear(); } } }
/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static final SegmentInfos readCommit(Directory directory, String segmentFileName) throws IOException { long generation = generationFromSegmentsFileName(segmentFileName); try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) { // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need // to read the magic ourselves. int magic = input.readInt(); if (magic != CodecUtil.CODEC_MAGIC) { throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } // 4.0+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT); // 5.0+ byte id[] = null; if (format >= VERSION_50) { id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); } SegmentInfos infos = new SegmentInfos(); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; if (format >= VERSION_53) { // TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... // or just rely on the // minSegmentLuceneVersion check instead: infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); } else { // else compute the min version down below in the for loop } infos.version = input.readLong(); infos.counter = input.readInt(); int numSegments = input.readInt(); if (numSegments < 0) { throw new CorruptIndexException("invalid segment count: " + numSegments, input); } if (format >= VERSION_53) { if (numSegments > 0) { infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) { throw new IndexFormatTooOldException( input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")"); } } else { // else leave as null: no segments } } else { // else we recompute it below as we visit segments; it can't be used for throwing // IndexFormatTooOldExc, but consumers of // SegmentInfos can maybe still use it for other reasons } long totalDocs = 0; for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); final byte segmentID[]; if (format >= VERSION_50) { byte hasID = input.readByte(); if (hasID == 1) { segmentID = new byte[StringHelper.ID_LENGTH]; input.readBytes(segmentID, 0, segmentID.length); } else if (hasID == 0) { segmentID = null; // 4.x segment, doesn't have an ID } else { throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input); } } else { segmentID = null; } Codec codec = readCodec(input, format < VERSION_53); SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); info.setCodec(codec); totalDocs += info.maxDoc(); long delGen = input.readLong(); int delCount = input.readInt(); if (delCount < 0 || delCount > info.maxDoc()) { throw new CorruptIndexException( "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); } long fieldInfosGen = -1; if (format >= VERSION_46) { fieldInfosGen = input.readLong(); } long dvGen = -1; if (format >= VERSION_49) { dvGen = input.readLong(); } else { dvGen = fieldInfosGen; } SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen); if (format >= VERSION_46) { if (format < VERSION_49) { // Recorded per-generation files, which were buggy (see // LUCENE-5636). We need to read and keep them so we continue to // reference those files. Unfortunately it means that the files will // be referenced even if the fields are updated again, until the // segment is merged. final int numGensUpdatesFiles = input.readInt(); final Map<Long, Set<String>> genUpdatesFiles; if (numGensUpdatesFiles == 0) { genUpdatesFiles = Collections.emptyMap(); } else { genUpdatesFiles = new HashMap<>(numGensUpdatesFiles); for (int i = 0; i < numGensUpdatesFiles; i++) { genUpdatesFiles.put(input.readLong(), input.readStringSet()); } } siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } else { if (format >= VERSION_51) { siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); } else { siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet())); } final Map<Integer, Set<String>> dvUpdateFiles; final int numDVFields = input.readInt(); if (numDVFields == 0) { dvUpdateFiles = Collections.emptyMap(); } else { Map<Integer, Set<String>> map = new HashMap<>(numDVFields); for (int i = 0; i < numDVFields; i++) { if (format >= VERSION_51) { map.put(input.readInt(), input.readSetOfStrings()); } else { map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet())); } } dvUpdateFiles = Collections.unmodifiableMap(map); } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); } } infos.add(siPerCommit); Version segmentVersion = info.getVersion(); if (format < VERSION_53) { if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { infos.minSegmentLuceneVersion = segmentVersion; } } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { throw new CorruptIndexException( "segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); } } if (format >= VERSION_51) { infos.userData = input.readMapOfStrings(); } else { infos.userData = Collections.unmodifiableMap(input.readStringStringMap()); } if (format >= VERSION_48) { CodecUtil.checkFooter(input); } else { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException( "checksum failed (hardware problem?) : expected=" + Long.toHexString(checksumThen) + " actual=" + Long.toHexString(checksumNow), input); } CodecUtil.checkEOF(input); } // LUCENE-6299: check we are in bounds if (totalDocs > IndexWriter.getActualMaxDocs()) { throw new CorruptIndexException( "Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); } return infos; } }