Lucene50NormsConsumer( SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { boolean success = false; try { String dataName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.createOutput(dataName, state.context); CodecUtil.writeIndexHeader( data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String metaName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, metaExtension); meta = state.directory.createOutput(metaName, state.context); CodecUtil.writeIndexHeader( meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this); } } }
public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) { // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an // ordinary codec header: version = CodecUtil.checkIndexHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); } else { version = CodecUtil.checkHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); } FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException( "no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent // unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM // here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
@Override public void write( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeHeader( output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.writeVInt(infos.size()); for (FieldInfo fi : infos) { IndexOptions indexOptions = fi.getIndexOptions(); byte bits = 0x0; if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS; if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; if (fi.getIndexOptions() != IndexOptions.NONE) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); if (indexOptions == IndexOptions.DOCS) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.writeString(fi.name); output.writeVInt(fi.number); output.writeByte(bits); // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType()); final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } CodecUtil.writeFooter(output); } }
/** Full constructor */ public Lucene60PointsWriter( SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException { assert writeState.fieldInfos.hasPointValues(); this.writeState = writeState; this.maxPointsInLeafNode = maxPointsInLeafNode; this.maxMBSortInHeap = maxMBSortInHeap; String dataFileName = IndexFileNames.segmentFileName( writeState.segmentInfo.name, writeState.segmentSuffix, Lucene60PointsFormat.DATA_EXTENSION); dataOut = writeState.directory.createOutput(dataFileName, writeState.context); boolean success = false; try { CodecUtil.writeIndexHeader( dataOut, Lucene60PointsFormat.DATA_CODEC_NAME, Lucene60PointsFormat.DATA_VERSION_CURRENT, writeState.segmentInfo.getId(), writeState.segmentSuffix); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(dataOut); } } }
@Test public void testCleanUpWithLegacyChecksums() throws IOException { Map<String, StoreFileMetaData> metaDataMap = new HashMap<>(); metaDataMap.put( "segments_1", new StoreFileMetaData("segments_1", 50, null, null, new BytesRef(new byte[] {1}))); metaDataMap.put( "_0_1.del", new StoreFileMetaData("_0_1.del", 42, "foobarbaz", null, new BytesRef())); Store.MetadataSnapshot snapshot = new Store.MetadataSnapshot(metaDataMap); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); for (String file : metaDataMap.keySet()) { try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); CodecUtil.writeFooter(output); } } store.verifyAfterCleanup(snapshot, snapshot); store.deleteContent(); IOUtils.close(store); }
private int readHeader(IndexInput in) throws IOException { return CodecUtil.checkHeader( in, FSTTermsWriter.TERMS_CODEC_NAME, FSTTermsWriter.TERMS_VERSION_START, FSTTermsWriter.TERMS_VERSION_CURRENT); }
public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
public CompletionFieldsConsumer(SegmentWriteState state) throws IOException { this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state); String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexOutput output = null; boolean success = false; try { output = state.directory.createOutput(suggestFSTFile, state.context); CodecUtil.writeIndexHeader( output, CODEC_NAME, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); /* * we write the delegate postings format name so we can load it * without getting an instance in the ctor */ output.writeString(delegatePostingsFormat.getName()); output.writeString(writeProvider.getName()); this.suggestFieldsConsumer = writeProvider.consumer(output); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); } } }
@Override public void checkIntegrity() throws IOException { // verify terms CodecUtil.checksumEntireFile(in); // verify postings postingsReader.checkIntegrity(); }
@Override public void checkIntegrity() throws IOException { // term dictionary CodecUtil.checksumEntireFile(termsIn); // postings postingsReader.checkIntegrity(); }
private void seekDir(IndexInput in) throws IOException { if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { in.seek(in.length() - CodecUtil.footerLength() - 8); } else { in.seek(in.length() - 8); } in.seek(in.readLong()); }
@Override public void init(IndexInput termsIn, SegmentReadState state) throws IOException { // Make sure we are talking to the matching postings writer CodecUtil.checkIndexHeader( termsIn, IDVersionPostingsWriter.TERMS_CODEC, IDVersionPostingsWriter.VERSION_START, IDVersionPostingsWriter.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); }
@Test public void testVerifyingIndexOutput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); indexInput.seek(0); BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long length = indexInput.length(); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT)); while (length > 0) { if (random().nextInt(10) == 0) { verifyingOutput.writeByte(indexInput.readByte()); length--; } else { int min = (int) Math.min(length, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); verifyingOutput.writeBytes(ref.bytes, ref.offset, min); length -= min; } } Store.verify(verifyingOutput); verifyingOutput.writeByte((byte) 0x0); try { Store.verify(verifyingOutput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(indexInput, verifyingOutput, dir); }
@Override public void close() throws IOException { boolean success = false; try { if (meta != null) { meta.writeVInt(-1); // write EOF marker CodecUtil.writeFooter(meta); // write checksum } if (data != null) { CodecUtil.writeFooter(data); // write checksum } success = true; } finally { if (success) { IOUtils.close(data, meta); } else { IOUtils.closeWhileHandlingException(data, meta); } meta = data = null; } }
@Override public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer CodecUtil.checkHeader( termsIn, SepPostingsWriter.CODEC, SepPostingsWriter.VERSION_START, SepPostingsWriter.VERSION_START); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); skipMinimum = termsIn.readInt(); }
@Test public void testVerifyingIndexInput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); // Check file IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); long checksum = CodecUtil.retrieveChecksum(indexInput); indexInput.seek(0); IndexInput verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo.bar", IOContext.DEFAULT)); readIndexInputFullyWithRandomSeeks(verifyingIndexInput); Store.verify(verifyingIndexInput); assertThat(checksum, equalTo(((ChecksumIndexInput) verifyingIndexInput).getChecksum())); IOUtils.close(indexInput, verifyingIndexInput); // Corrupt file and check again corruptFile(dir, "foo.bar", "foo1.bar"); verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo1.bar", IOContext.DEFAULT)); readIndexInputFullyWithRandomSeeks(verifyingIndexInput); try { Store.verify(verifyingIndexInput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(verifyingIndexInput); IOUtils.close(dir); }
@Override public void finish() throws IOException { if (finished) { throw new IllegalStateException("already finished"); } finished = true; CodecUtil.writeFooter(dataOut); String indexFileName = IndexFileNames.segmentFileName( writeState.segmentInfo.name, writeState.segmentSuffix, Lucene60PointsFormat.INDEX_EXTENSION); // Write index file try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) { CodecUtil.writeIndexHeader( indexOut, Lucene60PointsFormat.META_CODEC_NAME, Lucene60PointsFormat.INDEX_VERSION_CURRENT, writeState.segmentInfo.getId(), writeState.segmentSuffix); int count = indexFPs.size(); indexOut.writeVInt(count); for (Map.Entry<String, Long> ent : indexFPs.entrySet()) { FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey()); if (fieldInfo == null) { throw new IllegalStateException( "wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos"); } indexOut.writeVInt(fieldInfo.number); indexOut.writeVLong(ent.getValue()); } CodecUtil.writeFooter(indexOut); } }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; final IndexInput in = state.directory.openInput(termsFileName, state.context); boolean success = false; try { version = readHeader(in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.checksumEntireFile(in); } this.postingsReader.init(in); seekDir(in); final FieldInfos fieldInfos = state.fieldInfos; final int numFields = in.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = in.readVInt(); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); long numTerms = in.readVLong(); long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); long sumDocFreq = in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); TermsReader current = new TermsReader( fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, in, current, previous); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } }
@Override public void close() throws IOException { delegateFieldsConsumer.close(); // Now we are done accumulating values for these fields List<Entry<FieldInfo, FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo, FuzzySet>>(); for (Entry<FieldInfo, FuzzySet> entry : bloomFilters.entrySet()) { FuzzySet bloomFilter = entry.getValue(); if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) { nonSaturatedBlooms.add(entry); } } String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexOutput bloomOutput = null; try { bloomOutput = state.directory.createOutput(bloomFileName, state.context); CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION); // remember the name of the postings format we will delegate to bloomOutput.writeString(delegatePostingsFormat.getName()); // First field in the output file is the number of fields+blooms saved bloomOutput.writeInt(nonSaturatedBlooms.size()); for (Entry<FieldInfo, FuzzySet> entry : nonSaturatedBlooms) { FieldInfo fieldInfo = entry.getKey(); FuzzySet bloomFilter = entry.getValue(); bloomOutput.writeInt(fieldInfo.number); saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); } } finally { IOUtils.close(bloomOutput); } // We are done with large bitsets so no need to keep them hanging around bloomFilters.clear(); }
@Test public void testRenameFile() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random(), false); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); { IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); } store.renameFile("foo.bar", "bar.foo"); assertThat(store.directory().listAll().length, is(1)); final long lastChecksum; try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) { lastChecksum = CodecUtil.checksumEntireFile(input); } try { store.directory().openInput("foo.bar", IOContext.DEFAULT); fail("file was renamed"); } catch (FileNotFoundException | NoSuchFileException ex) { // expected } { IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); } DistributorDirectory distributorDirectory = DirectoryUtils.getLeaf(store.directory(), DistributorDirectory.class); if (distributorDirectory != null && distributorDirectory.getDirectory("foo.bar") != distributorDirectory.getDirectory("bar.foo")) { try { store.renameFile("foo.bar", "bar.foo"); fail("target file already exists in a different directory"); } catch (IOException ex) { // expected } try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) { assertThat(lastChecksum, equalTo(CodecUtil.checksumEntireFile(input))); } assertThat(store.directory().listAll().length, is(2)); assertDeleteContent(store, directoryService); IOUtils.close(store); } else { store.renameFile("foo.bar", "bar.foo"); assertThat(store.directory().listAll().length, is(1)); assertDeleteContent(store, directoryService); IOUtils.close(store); } }
/** * Given a file, return a VersionedTranslogStream based on an optionally-existing header in the * file. If the file does not exist, or has zero length, returns the latest version. If the header * does not exist, assumes Version 0 of the translog file format. */ public static ImmutableTranslogReader open( ChannelReference channelReference, Checkpoint checkpoint, String translogUUID) throws IOException { final FileChannel channel = channelReference.getChannel(); final Path path = channelReference.getPath(); assert channelReference.getGeneration() == checkpoint.generation : "expected generation: " + channelReference.getGeneration() + " but got: " + checkpoint.generation; try { if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0); } InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the // header, in binary this looks like: // // binary: 0011 1111 1101 0111 0110 1100 0001 0111 // hex : 3 f d 7 6 c 1 7 // // With version 0 of the translog, the first byte is the // Operation.Type, which will always be between 0-4, so we know if // we grab the first byte, it can be: // 0x3f => Lucene's magic number, so we can assume it's version 1 or later // 0x00 => version 0 of the translog // // otherwise the first byte of the translog is corrupted and we // should bail byte b1 = headerStream.readByte(); if (b1 == LUCENE_CODEC_HEADER_BYTE) { // Read 3 more bytes, meaning a whole integer has been read byte b2 = headerStream.readByte(); byte b3 = headerStream.readByte(); byte b4 = headerStream.readByte(); // Convert the 4 bytes that were read into an integer int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0); // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17) // ourselves here, because it allows us to read the first // byte separately if (header != CodecUtil.CODEC_MAGIC) { throw new TranslogCorruptedException( "translog looks like version 1 or later, but has corrupted header"); } // Confirm the rest of the header using CodecUtil, extracting // the translog version int version = CodecUtil.checkHeaderNoMagic( new InputStreamDataInput(headerStream), TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE); switch (version) { case TranslogWriter.VERSION_CHECKSUMS: assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; // legacy - we still have to support it somehow return new LegacyTranslogReaderBase( channelReference.getGeneration(), channelReference, CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset); case TranslogWriter.VERSION_CHECKPOINTS: assert path.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path; assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: " + checkpoint.numOps; assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: " + channel.size() + " " + checkpoint; int len = headerStream.readInt(); if (len > channel.size()) { throw new TranslogCorruptedException("uuid length can't be larger than the translog"); } BytesRef ref = new BytesRef(len); ref.length = len; headerStream.read(ref.bytes, ref.offset, ref.length); BytesRef uuidBytes = new BytesRef(translogUUID); if (uuidBytes.bytesEquals(ref) == false) { throw new TranslogCorruptedException( "expected shard UUID [" + uuidBytes + "] but got: [" + ref + "] this translog file belongs to a different translog"); } return new ImmutableTranslogReader( channelReference.getGeneration(), channelReference, ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC) + RamUsageEstimator.NUM_BYTES_INT, checkpoint.offset, checkpoint.numOps); default: throw new TranslogCorruptedException( "No known translog stream version: " + version + " path:" + path); } } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) { assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; return new LegacyTranslogReader( channelReference.getGeneration(), channelReference, checkpoint.offset); } else { throw new TranslogCorruptedException( "Invalid first byte in translog file, got: " + Long.toHexString(b1) + ", expected 0x00 or 0x3f"); } } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { throw new TranslogCorruptedException("Translog header corrupted", e); } }
/** Sole constructor. */ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null; this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader( termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader( indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put( fieldInfo.name, new FieldReader( this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
private boolean doRestore() throws Exception { Path backupPath = Paths.get(backupLocation).resolve(backupName); SimpleDateFormat dateFormat = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT); String restoreIndexName = "restore." + dateFormat.format(new Date()); String restoreIndexPath = core.getDataDir() + restoreIndexName; Directory restoreIndexDir = null; Directory indexDir = null; try (Directory backupDir = FSDirectory.open(backupPath)) { final Version version = IndexFetcher.checkOldestVersion(SegmentInfos.readLatestCommit(backupDir)); restoreIndexDir = core.getDirectoryFactory() .get( restoreIndexPath, DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); // Prefer local copy. indexDir = core.getDirectoryFactory() .get( core.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); // Move all files from backupDir to restoreIndexDir for (String filename : backupDir.listAll()) { checkInterrupted(); log.info("Copying file {} to restore directory ", filename); try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) { Long checksum = null; try { checksum = CodecUtil.retrieveChecksum(indexInput); } catch (Exception e) { log.warn("Could not read checksum from index file: " + filename, e); } long length = indexInput.length(); IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, version, filename, length, checksum); if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si") || filename.endsWith(".liv") || filename.startsWith("segments_")))) { restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE); } else { // prefer local copy restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE); } } catch (Exception e) { throw new SolrException( SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e); } } log.debug("Switching directories"); IndexFetcher.modifyIndexProps(core, restoreIndexName); boolean success; try { core.getUpdateHandler().newIndexWriter(false); openNewSearcher(); success = true; log.info("Successfully restored to the backup index"); } catch (Exception e) { // Rollback to the old index directory. Delete the restore index directory and mark the // restore as failed. log.warn("Could not switch to restored index. Rolling back to the current index"); Directory dir = null; try { dir = core.getDirectoryFactory() .get( core.getDataDir(), DirectoryFactory.DirContext.META_DATA, core.getSolrConfig().indexConfig.lockType); dir.deleteFile(IndexFetcher.INDEX_PROPERTIES); } finally { if (dir != null) { core.getDirectoryFactory().release(dir); } } core.getDirectoryFactory().doneWithDirectory(restoreIndexDir); core.getDirectoryFactory().remove(restoreIndexDir); core.getUpdateHandler().newIndexWriter(false); openNewSearcher(); throw new SolrException( SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e); } if (success) { core.getDirectoryFactory().doneWithDirectory(indexDir); core.getDirectoryFactory().remove(indexDir); } return true; } finally { if (restoreIndexDir != null) { core.getDirectoryFactory().release(restoreIndexDir); } if (indexDir != null) { core.getDirectoryFactory().release(indexDir); } } }
/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static final SegmentInfos readCommit(Directory directory, String segmentFileName) throws IOException { long generation = generationFromSegmentsFileName(segmentFileName); try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) { // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need // to read the magic ourselves. int magic = input.readInt(); if (magic != CodecUtil.CODEC_MAGIC) { throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } // 4.0+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT); // 5.0+ byte id[] = null; if (format >= VERSION_50) { id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); } SegmentInfos infos = new SegmentInfos(); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; if (format >= VERSION_53) { // TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... // or just rely on the // minSegmentLuceneVersion check instead: infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); } else { // else compute the min version down below in the for loop } infos.version = input.readLong(); infos.counter = input.readInt(); int numSegments = input.readInt(); if (numSegments < 0) { throw new CorruptIndexException("invalid segment count: " + numSegments, input); } if (format >= VERSION_53) { if (numSegments > 0) { infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) { throw new IndexFormatTooOldException( input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")"); } } else { // else leave as null: no segments } } else { // else we recompute it below as we visit segments; it can't be used for throwing // IndexFormatTooOldExc, but consumers of // SegmentInfos can maybe still use it for other reasons } long totalDocs = 0; for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); final byte segmentID[]; if (format >= VERSION_50) { byte hasID = input.readByte(); if (hasID == 1) { segmentID = new byte[StringHelper.ID_LENGTH]; input.readBytes(segmentID, 0, segmentID.length); } else if (hasID == 0) { segmentID = null; // 4.x segment, doesn't have an ID } else { throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input); } } else { segmentID = null; } Codec codec = readCodec(input, format < VERSION_53); SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); info.setCodec(codec); totalDocs += info.maxDoc(); long delGen = input.readLong(); int delCount = input.readInt(); if (delCount < 0 || delCount > info.maxDoc()) { throw new CorruptIndexException( "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); } long fieldInfosGen = -1; if (format >= VERSION_46) { fieldInfosGen = input.readLong(); } long dvGen = -1; if (format >= VERSION_49) { dvGen = input.readLong(); } else { dvGen = fieldInfosGen; } SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen); if (format >= VERSION_46) { if (format < VERSION_49) { // Recorded per-generation files, which were buggy (see // LUCENE-5636). We need to read and keep them so we continue to // reference those files. Unfortunately it means that the files will // be referenced even if the fields are updated again, until the // segment is merged. final int numGensUpdatesFiles = input.readInt(); final Map<Long, Set<String>> genUpdatesFiles; if (numGensUpdatesFiles == 0) { genUpdatesFiles = Collections.emptyMap(); } else { genUpdatesFiles = new HashMap<>(numGensUpdatesFiles); for (int i = 0; i < numGensUpdatesFiles; i++) { genUpdatesFiles.put(input.readLong(), input.readStringSet()); } } siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } else { if (format >= VERSION_51) { siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); } else { siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet())); } final Map<Integer, Set<String>> dvUpdateFiles; final int numDVFields = input.readInt(); if (numDVFields == 0) { dvUpdateFiles = Collections.emptyMap(); } else { Map<Integer, Set<String>> map = new HashMap<>(numDVFields); for (int i = 0; i < numDVFields; i++) { if (format >= VERSION_51) { map.put(input.readInt(), input.readSetOfStrings()); } else { map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet())); } } dvUpdateFiles = Collections.unmodifiableMap(map); } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); } } infos.add(siPerCommit); Version segmentVersion = info.getVersion(); if (format < VERSION_53) { if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { infos.minSegmentLuceneVersion = segmentVersion; } } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { throw new CorruptIndexException( "segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); } } if (format >= VERSION_51) { infos.userData = input.readMapOfStrings(); } else { infos.userData = Collections.unmodifiableMap(input.readStringStringMap()); } if (format >= VERSION_48) { CodecUtil.checkFooter(input); } else { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException( "checksum failed (hardware problem?) : expected=" + Long.toHexString(checksumThen) + " actual=" + Long.toHexString(checksumNow), input); } CodecUtil.checkEOF(input); } // LUCENE-6299: check we are in bounds if (totalDocs > IndexWriter.getActualMaxDocs()) { throw new CorruptIndexException( "Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); } return infos; } }
@Override public FieldInfos read( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { int codecVersion = CodecUtil.checkHeader( input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT); final int size = input.readVInt(); // read in the size FieldInfo infos[] = new FieldInfo[size]; for (int i = 0; i < size; i++) { String name = input.readString(); final int fieldNumber = input.readVInt(); if (fieldNumber < 0) { throw new CorruptIndexException( "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input); } byte bits = input.readByte(); boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; final IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS; } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.readByte(); final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map<String, String> attributes = input.readStringStringMap(); if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) { // Undead norms! Lucene42NormsProducer will check this and bring norms back from the // dead: UndeadNormsProducer.setUndead(attributes); } infos[i] = new FieldInfo( name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); infos[i].checkConsistency(); } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { CodecUtil.checkFooter(input); } else { CodecUtil.checkEOF(input); } return new FieldInfos(infos); } }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }
/** Seek {@code input} to the directory offset. */ private void seekDir(IndexInput input, long dirOffset) throws IOException { input.seek(input.length() - CodecUtil.footerLength() - 8); dirOffset = input.readLong(); input.seek(dirOffset); }
public void testCheckIntegrity() throws IOException { Directory dir = newDirectory(); long luceneFileLength = 0; try (IndexOutput output = dir.createOutput("lucene_checksum.bin", IOContext.DEFAULT)) { int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); luceneFileLength += bytesRef.length; } CodecUtil.writeFooter(output); luceneFileLength += CodecUtil.footerLength(); } final Adler32 adler32 = new Adler32(); long legacyFileLength = 0; try (IndexOutput output = dir.createOutput("legacy.bin", IOContext.DEFAULT)) { int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); adler32.update(bytesRef.bytes, bytesRef.offset, bytesRef.length); legacyFileLength += bytesRef.length; } } final long luceneChecksum; final long adler32LegacyChecksum = adler32.getValue(); try (IndexInput indexInput = dir.openInput("lucene_checksum.bin", IOContext.DEFAULT)) { assertEquals(luceneFileLength, indexInput.length()); luceneChecksum = CodecUtil.retrieveChecksum(indexInput); } { // positive check StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertTrue(Store.checkIntegrityNoException(lucene, dir)); assertTrue(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong checksum StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength, Store.digestToString(luceneChecksum + 1), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum + 1)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong length StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength + 1, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength + 1, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong file StoreFileMetaData lucene = new StoreFileMetaData( "legacy.bin", luceneFileLength, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "lucene_checksum.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } dir.close(); }
@Test public void testMixedChecksums() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); // this time random codec.... IndexWriter writer = new IndexWriter( store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())) .setCodec(actualDefaultCodec())); int docs = 1 + random().nextInt(100); for (int i = 0; i < docs; i++) { Document doc = new Document(); doc.add( new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); writer.addDocument(doc); } if (random().nextBoolean()) { for (int i = 0; i < docs; i++) { if (random().nextBoolean()) { Document doc = new Document(); doc.add( new TextField( "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.updateDocument(new Term("id", "" + i), doc); } } } if (random().nextBoolean()) { DirectoryReader.open(writer, random().nextBoolean()).close(); // flush } Store.MetadataSnapshot metadata; // check before we committed try { store.getMetadata(); fail("no index present - expected exception"); } catch (IndexNotFoundException ex) { // expected } assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed writer.commit(); writer.close(); Store.LegacyChecksums checksums = new Store.LegacyChecksums(); metadata = store.getMetadata(); assertThat(metadata.asMap().isEmpty(), is(false)); for (StoreFileMetaData meta : metadata) { try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) { if (meta.checksum() == null) { String checksum = null; try { CodecUtil.retrieveChecksum(input); fail("expected a corrupt index - posting format has not checksums"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { try (ChecksumIndexInput checksumIndexInput = store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) { checksumIndexInput.seek(meta.length()); checksum = Store.digestToString(checksumIndexInput.getChecksum()); } // fine - it's a postings format without checksums checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null)); } } else { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); assertThat( "File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum)); assertThat(meta.hasLegacyChecksum(), equalTo(false)); assertThat(meta.writtenBy(), equalTo(Version.LATEST)); } } } assertConsistent(store, metadata); checksums.write(store); metadata = store.getMetadata(); assertThat(metadata.asMap().isEmpty(), is(false)); for (StoreFileMetaData meta : metadata) { assertThat( "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue())); if (meta.hasLegacyChecksum()) { try (ChecksumIndexInput checksumIndexInput = store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) { checksumIndexInput.seek(meta.length()); assertThat( meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum()))); } } else { try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); assertThat( "File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum)); assertThat(meta.hasLegacyChecksum(), equalTo(false)); assertThat(meta.writtenBy(), equalTo(Version.LATEST)); } } } assertConsistent(store, metadata); TestUtil.checkIndex(store.directory()); assertDeleteContent(store, directoryService); IOUtils.close(store); }
@Test public void testNewChecksums() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); // set default codec - all segments need checksums IndexWriter writer = new IndexWriter( store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())) .setCodec(actualDefaultCodec())); int docs = 1 + random().nextInt(100); for (int i = 0; i < docs; i++) { Document doc = new Document(); doc.add( new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); writer.addDocument(doc); } if (random().nextBoolean()) { for (int i = 0; i < docs; i++) { if (random().nextBoolean()) { Document doc = new Document(); doc.add( new TextField( "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.updateDocument(new Term("id", "" + i), doc); } } } if (random().nextBoolean()) { DirectoryReader.open(writer, random().nextBoolean()).close(); // flush } Store.MetadataSnapshot metadata; // check before we committed try { store.getMetadata(); fail("no index present - expected exception"); } catch (IndexNotFoundException ex) { // expected } assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed writer.commit(); writer.close(); metadata = store.getMetadata(); assertThat(metadata.asMap().isEmpty(), is(false)); for (StoreFileMetaData meta : metadata) { try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); assertThat( "File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum)); assertThat(meta.hasLegacyChecksum(), equalTo(false)); assertThat(meta.writtenBy(), equalTo(Version.LATEST)); if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) { assertThat(meta.hash().length, greaterThan(0)); } } } assertConsistent(store, metadata); TestUtil.checkIndex(store.directory()); assertDeleteContent(store, directoryService); IOUtils.close(store); }