private void testOn(Directory dir, int writeSize, int readSize, Cache cache) throws IOException { if (cache != null) cache .clear(); // needed to make sure no chunks are left over in case of Infinispan // implementation final String filename = "chunkTest"; IndexOutput indexOutput = dir.createOutput(filename); byte[] toWrite = fillBytes(writeSize); indexOutput.writeBytes(toWrite, writeSize); indexOutput.close(); if (cache != null) { AssertJUnit.assertEquals( writeSize, DirectoryIntegrityCheck.deepCountFileSize(new FileCacheKey(INDEXNAME, filename), cache)); } AssertJUnit.assertEquals(writeSize, indexOutput.length()); byte[] results = new byte[readSize]; IndexInput openInput = dir.openInput(filename); try { openInput.readBytes(results, 0, readSize); for (int i = 0; i < writeSize && i < readSize; i++) { AssertJUnit.assertEquals(results[i], toWrite[i]); } if (readSize > writeSize) AssertJUnit.fail("should have thrown an IOException for reading past EOF"); } catch (IOException ioe) { if (readSize <= writeSize) AssertJUnit.fail("should not have thrown an IOException" + ioe.getMessage()); } }
public void multipleFlushTest() throws IOException { final String filename = "longFile.writtenInMultipleFlushes"; final int bufferSize = 300; Cache cache = cacheManager.getCache(); cache.clear(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(13) .create(); byte[] manyBytes = fillBytes(bufferSize); IndexOutput indexOutput = dir.createOutput(filename); for (int i = 0; i < 10; i++) { indexOutput.writeBytes(manyBytes, bufferSize); indexOutput.flush(); } indexOutput.close(); IndexInput input = dir.openInput(filename); final int finalSize = (10 * bufferSize); AssertJUnit.assertEquals(finalSize, input.length()); final byte[] resultingBuffer = new byte[finalSize]; input.readBytes(resultingBuffer, 0, finalSize); int index = 0; for (int i = 0; i < 10; i++) { for (int j = 0; j < bufferSize; j++) AssertJUnit.assertEquals(resultingBuffer[index++], manyBytes[j]); } }
@Test public void testCleanUpWithLegacyChecksums() throws IOException { Map<String, StoreFileMetaData> metaDataMap = new HashMap<>(); metaDataMap.put( "segments_1", new StoreFileMetaData("segments_1", 50, null, null, new BytesRef(new byte[] {1}))); metaDataMap.put( "_0_1.del", new StoreFileMetaData("_0_1.del", 42, "foobarbaz", null, new BytesRef())); Store.MetadataSnapshot snapshot = new Store.MetadataSnapshot(metaDataMap); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); for (String file : metaDataMap.keySet()) { try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); CodecUtil.writeFooter(output); } } store.verifyAfterCleanup(snapshot, snapshot); store.deleteContent(); IOUtils.close(store); }
@Override public void close() throws IOException { IOException ioe = null; try { final long dirStart = out.getFilePointer(); final long indexDirStart = indexOut.getFilePointer(); out.writeVInt(fields.size()); for (FieldMetaData field : fields) { // System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); out.writeVInt(field.fieldInfo.number); out.writeVLong(field.numTerms); out.writeVInt(field.rootCode.length); out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length); if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { out.writeVLong(field.sumTotalTermFreq); } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); indexOut.writeVLong(field.indexStartFP); } writeTrailer(out, dirStart); writeIndexTrailer(indexOut, indexDirStart); } catch (IOException ioe2) { ioe = ioe2; } finally { IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter); } }
private void flush() throws IOException { encoder.encode(nextValues, 0, nextBlocks, 0, iterations); final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue); output.writeBytes(nextBlocks, blockCount); Arrays.fill(nextValues, 0L); off = 0; }
@Override public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); final Store store = recoveryStatus.store(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); final RecoveryState.Index indexState = recoveryStatus.state().getIndex(); if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) { indexState.addSourceThrottling(request.sourceThrottleTimeInNanos()); } IndexOutput indexOutput; if (request.position() == 0) { indexOutput = recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store); } else { indexOutput = recoveryStatus.getOpenIndexOutput(request.name()); } BytesReference content = request.content(); if (!content.hasArray()) { content = content.toBytesArray(); } RateLimiter rl = recoverySettings.rateLimiter(); if (rl != null) { long bytes = bytesSinceLastPause.addAndGet(content.length()); if (bytes > rl.getMinPauseCheckBytes()) { // Time to pause bytesSinceLastPause.addAndGet(-bytes); long throttleTimeInNanos = rl.pause(bytes); indexState.addTargetThrottling(throttleTimeInNanos); recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos); } } indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length()); indexState.addRecoveredBytesToFile(request.name(), content.length()); if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) { try { Store.verify(indexOutput); } finally { // we are done indexOutput.close(); } // write the checksum recoveryStatus.legacyChecksums().add(request.metadata()); final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name()); assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName); store.directory().sync(Collections.singleton(temporaryFileName)); IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name()); assert remove == null || remove == indexOutput; // remove maybe null if we got finished } } channel.sendResponse(TransportResponse.Empty.INSTANCE); }
private void insertData(CoherenceDirectory dir, String fileName) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput(fileName); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (dir.getBucketSize() > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
private void insertData(ByteBufferDirectory dir, int bufferSizeInBytes) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput("value1", IOContext.DEFAULT); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (bufferSizeInBytes > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
@Test public void testVerifyingIndexOutput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); indexInput.seek(0); BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long length = indexInput.length(); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT)); while (length > 0) { if (random().nextInt(10) == 0) { verifyingOutput.writeByte(indexInput.readByte()); length--; } else { int min = (int) Math.min(length, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); verifyingOutput.writeBytes(ref.bytes, ref.offset, min); length -= min; } } Store.verify(verifyingOutput); verifyingOutput.writeByte((byte) 0x0); try { Store.verify(verifyingOutput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(indexInput, verifyingOutput, dir); }
@Override protected void finishInternal(int docCount) throws IOException { final int numValues = hash.size(); final IndexOutput datOut = getOrCreateDataOut(); datOut.writeInt(size); if (size != -1) { final BytesRef bytesRef = new BytesRef(size); for (int i = 0; i < numValues; i++) { hash.get(i, bytesRef); datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } } final IndexOutput idxOut = getOrCreateIndexOut(); idxOut.writeInt(numValues); writeIndex(idxOut, docCount, numValues, docToEntry); }
public void testWriteChunks() throws Exception { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 66); io.writeByte((byte) 69); io.flush(); io.close(); assert dir.fileExists("MyNewFile.txt"); assert null != cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); // test contents by reading: byte[] buf = new byte[9]; IndexInput ii = dir.openInput("MyNewFile.txt"); ii.readBytes(buf, 0, (int) ii.length()); ii.close(); assert new String(new byte[] {66, 69}).equals(new String(buf).trim()); String testText = "This is some rubbish again that will span more than one chunk - one hopes. Who knows, maybe even three or four chunks."; io = dir.createOutput("MyNewFile.txt"); io.seek(0); io.writeBytes(testText.getBytes(), 0, testText.length()); io.close(); // now compare. byte[] chunk1 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); byte[] chunk2 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 1, BUFFER_SIZE)); assert null != chunk1; assert null != chunk2; assert testText.equals(new String(chunk1) + new String(chunk2).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
/** * This test that writes larger than the size of the buffer output will correctly increment the * file pointer. */ public void testLargeWrites() throws IOException { IndexOutput os = dir.createOutput("testBufferStart.txt"); byte[] largeBuf = new byte[2048]; for (int i = 0; i < largeBuf.length; i++) { largeBuf[i] = (byte) (Math.random() * 256); } long currentPos = os.getFilePointer(); os.writeBytes(largeBuf, largeBuf.length); try { assertEquals(currentPos + largeBuf.length, os.getFilePointer()); } finally { os.close(); } }
private void corruptFile(Directory dir, String fileIn, String fileOut) throws IOException { IndexInput input = dir.openInput(fileIn, IOContext.READONCE); IndexOutput output = dir.createOutput(fileOut, IOContext.DEFAULT); long len = input.length(); byte[] b = new byte[1024]; long broken = randomInt((int) len); long pos = 0; while (pos < len) { int min = (int) Math.min(input.length() - pos, b.length); input.readBytes(b, 0, min); if (broken >= pos && broken < pos + min) { // Flip one byte int flipPos = (int) (broken - pos); b[flipPos] = (byte) (b[flipPos] ^ 42); } output.writeBytes(b, min); pos += min; } IOUtils.close(input, output); }
@Test public void testWriteChunksDefaultChunks() throws Exception { Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME).create(); final String testText = "This is some rubbish"; final byte[] testTextAsBytes = testText.getBytes(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 1); io.writeByte((byte) 2); io.writeByte((byte) 3); io.writeBytes(testTextAsBytes, testTextAsBytes.length); io.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); FileCacheKey fileCacheKey = new FileCacheKey(INDEXNAME, "MyNewFile.txt"); assert null != cache.get(fileCacheKey); FileMetadata metadata = (FileMetadata) cache.get(fileCacheKey); AssertJUnit.assertEquals(testTextAsBytes.length + 3, metadata.getSize()); assert null != cache.get( new ChunkCacheKey( INDEXNAME, "MyNewFile.txt", 0, DirectoryBuilderImpl.DEFAULT_BUFFER_SIZE)); // test contents by reading: IndexInput ii = dir.openInput("MyNewFile.txt"); assert ii.readByte() == 1; assert ii.readByte() == 2; assert ii.readByte() == 3; byte[] buf = new byte[testTextAsBytes.length]; ii.readBytes(buf, 0, testTextAsBytes.length); ii.close(); assert testText.equals(new String(buf).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
public void multipleFlushTest() throws IOException { final String filename = "longFile.writtenInMultipleFlushes"; final int bufferSize = 300; Cache cache = cacheManager.getCache(); cache.clear(); InfinispanDirectory dir = new InfinispanDirectory(cache, cache, cache, INDEXNAME, 13); byte[] manyBytes = fillBytes(bufferSize); IndexOutput indexOutput = dir.createOutput(filename); for (int i = 0; i < 10; i++) { indexOutput.writeBytes(manyBytes, bufferSize); indexOutput.flush(); } indexOutput.close(); IndexInput input = dir.openInput(filename); final int finalSize = (10 * bufferSize); assert input.length() == finalSize; final byte[] resultingBuffer = new byte[finalSize]; input.readBytes(resultingBuffer, 0, finalSize); int index = 0; for (int i = 0; i < 10; i++) { for (int j = 0; j < bufferSize; j++) assert resultingBuffer[index++] == manyBytes[j]; } }
@Test public void testVerifyingIndexInput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); // Check file IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); long checksum = CodecUtil.retrieveChecksum(indexInput); indexInput.seek(0); IndexInput verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo.bar", IOContext.DEFAULT)); readIndexInputFullyWithRandomSeeks(verifyingIndexInput); Store.verify(verifyingIndexInput); assertThat(checksum, equalTo(((ChecksumIndexInput) verifyingIndexInput).getChecksum())); IOUtils.close(indexInput, verifyingIndexInput); // Corrupt file and check again corruptFile(dir, "foo.bar", "foo1.bar"); verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo1.bar", IOContext.DEFAULT)); readIndexInputFullyWithRandomSeeks(verifyingIndexInput); try { Store.verify(verifyingIndexInput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(verifyingIndexInput); IOUtils.close(dir); }
@Override protected void writeHeader(IndexOutput out) throws IOException { out.writeBytes(LZFCompressor.LUCENE_HEADER, LZFCompressor.LUCENE_HEADER.length); }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }
@Override public void writeBytes(byte[] b, int offset, int length) throws IOException { delegate.writeBytes(b, offset, length); }
@Test public void testRenameFile() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random(), false); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); { IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); } store.renameFile("foo.bar", "bar.foo"); assertThat(store.directory().listAll().length, is(1)); final long lastChecksum; try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) { lastChecksum = CodecUtil.checksumEntireFile(input); } try { store.directory().openInput("foo.bar", IOContext.DEFAULT); fail("file was renamed"); } catch (FileNotFoundException | NoSuchFileException ex) { // expected } { IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); } DistributorDirectory distributorDirectory = DirectoryUtils.getLeaf(store.directory(), DistributorDirectory.class); if (distributorDirectory != null && distributorDirectory.getDirectory("foo.bar") != distributorDirectory.getDirectory("bar.foo")) { try { store.renameFile("foo.bar", "bar.foo"); fail("target file already exists in a different directory"); } catch (IOException ex) { // expected } try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) { assertThat(lastChecksum, equalTo(CodecUtil.checksumEntireFile(input))); } assertThat(store.directory().listAll().length, is(2)); assertDeleteContent(store, directoryService); IOUtils.close(store); } else { store.renameFile("foo.bar", "bar.foo"); assertThat(store.directory().listAll().length, is(1)); assertDeleteContent(store, directoryService); IOUtils.close(store); } }
@Test public void testWriteUsingSeekMethod() throws IOException { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); String fileName = "SomeText.txt"; IndexOutput io = dir.createOutput(fileName); RepeatableLongByteSequence bytesGenerator = new RepeatableLongByteSequence(); // It writes repeatable text final int REPEATABLE_BUFFER_SIZE = 1501; for (int i = 0; i < REPEATABLE_BUFFER_SIZE; i++) { io.writeByte(bytesGenerator.nextByte()); } io.flush(); assert io.length() == REPEATABLE_BUFFER_SIZE; // Text to write on file with repeatable text final String someText = "This is some text"; final byte[] someTextAsBytes = someText.getBytes(); // 4 points in random order where writing someText: at begin of file, at end of file, within a // single chunk, // between 2 chunks final int[] pointers = {0, 635, REPEATABLE_BUFFER_SIZE, 135}; for (int i = 0; i < pointers.length; i++) { io.seek(pointers[i]); io.writeBytes(someTextAsBytes, someTextAsBytes.length); } io.close(); bytesGenerator.reset(); final long finalSize = REPEATABLE_BUFFER_SIZE + someTextAsBytes.length; assert io.length() == finalSize; assert io.length() == DirectoryIntegrityCheck.deepCountFileSize(new FileCacheKey(INDEXNAME, fileName), cache); int indexPointer = 0; Arrays.sort(pointers); byte[] buffer = null; int chunkIndex = -1; // now testing the stream is equal to the produced repeatable but including the edits at pointed // positions for (int i = 0; i < REPEATABLE_BUFFER_SIZE + someTextAsBytes.length; i++) { if (i % BUFFER_SIZE == 0) { buffer = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, fileName, ++chunkIndex, BUFFER_SIZE)); } byte predictableByte = bytesGenerator.nextByte(); if (i < pointers[indexPointer]) { // Assert predictable text AssertJUnit.assertEquals(predictableByte, buffer[i % BUFFER_SIZE]); } else if (pointers[indexPointer] <= i && i < pointers[indexPointer] + someTextAsBytes.length) { // Assert someText AssertJUnit.assertEquals( someTextAsBytes[i - pointers[indexPointer]], buffer[i % BUFFER_SIZE]); } if (i == pointers[indexPointer] + someTextAsBytes.length) { // Change pointer indexPointer++; } } dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
public void testCheckIntegrity() throws IOException { Directory dir = newDirectory(); long luceneFileLength = 0; try (IndexOutput output = dir.createOutput("lucene_checksum.bin", IOContext.DEFAULT)) { int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); luceneFileLength += bytesRef.length; } CodecUtil.writeFooter(output); luceneFileLength += CodecUtil.footerLength(); } final Adler32 adler32 = new Adler32(); long legacyFileLength = 0; try (IndexOutput output = dir.createOutput("legacy.bin", IOContext.DEFAULT)) { int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); adler32.update(bytesRef.bytes, bytesRef.offset, bytesRef.length); legacyFileLength += bytesRef.length; } } final long luceneChecksum; final long adler32LegacyChecksum = adler32.getValue(); try (IndexInput indexInput = dir.openInput("lucene_checksum.bin", IOContext.DEFAULT)) { assertEquals(luceneFileLength, indexInput.length()); luceneChecksum = CodecUtil.retrieveChecksum(indexInput); } { // positive check StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertTrue(Store.checkIntegrityNoException(lucene, dir)); assertTrue(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong checksum StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength, Store.digestToString(luceneChecksum + 1), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum + 1)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong length StoreFileMetaData lucene = new StoreFileMetaData( "lucene_checksum.bin", luceneFileLength + 1, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "legacy.bin", legacyFileLength + 1, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } { // negative check - wrong file StoreFileMetaData lucene = new StoreFileMetaData( "legacy.bin", luceneFileLength, Store.digestToString(luceneChecksum), Version.LUCENE_4_8_0); StoreFileMetaData legacy = new StoreFileMetaData( "lucene_checksum.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum)); assertTrue(legacy.hasLegacyChecksum()); assertFalse(lucene.hasLegacyChecksum()); assertFalse(Store.checkIntegrityNoException(lucene, dir)); assertFalse(Store.checkIntegrityNoException(legacy, dir)); } dir.close(); }
/** * Called once per field per document if term vectors are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to the real term vectors files in the Directory. */ @Override void finish() throws IOException { assert docState.testPoint("TermVectorsTermsWriterPerField.finish start"); final int numPostings = termsHashPerField.bytesHash.size(); final BytesRef flushTerm = perThread.flushTerm; assert numPostings >= 0; if (!doVectors || numPostings == 0) return; if (numPostings > maxNumPostings) maxNumPostings = numPostings; final IndexOutput tvf = perThread.doc.perDocTvf; // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; assert perThread.vectorFieldsInOrder(fieldInfo); perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; // TODO: we may want to make this sort in same order // as Codec's terms dict? final int[] termIDs = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUnicodeComparator()); tvf.writeVInt(numPostings); byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; final ByteSliceReader reader = perThread.vectorSliceReader; final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; for (int j = 0; j < numPostings; j++) { final int termID = termIDs[j]; final int freq = postings.freqs[termID]; // Get BytesRef termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]); // Compute common byte prefix between last term and // this term int prefix = 0; if (j > 0) { while (prefix < lastLen && prefix < flushTerm.length) { if (lastBytes[lastStart + prefix] != flushTerm.bytes[flushTerm.offset + prefix]) { break; } prefix++; } } lastLen = flushTerm.length; lastBytes = flushTerm.bytes; lastStart = flushTerm.offset; final int suffix = flushTerm.length - prefix; tvf.writeVInt(prefix); tvf.writeVInt(suffix); tvf.writeBytes(flushTerm.bytes, lastStart + prefix, suffix); tvf.writeVInt(freq); if (doVectorPositions) { termsHashPerField.initReader(reader, termID, 0); reader.writeTo(tvf); } if (doVectorOffsets) { termsHashPerField.initReader(reader, termID, 1); reader.writeTo(tvf); } } termsHashPerField.reset(); // NOTE: we clear, per-field, at the thread level, // because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large // fields w/ term vectors on) because we recycle/reuse // all RAM after each field: perThread.termsHashPerThread.reset(false); }