private void verifyData(CoherenceDirectory dir, String fileName) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; assertTrue(dir.fileExists(fileName)); assertEquals(38, dir.fileLength(fileName)); IndexInput indexInput = dir.openInput(fileName); indexInput.readBytes(test, 0, 5); assertEquals(8, test[0]); assertEquals(-1, indexInput.readInt()); assertEquals(10, indexInput.readLong()); assertEquals(0, indexInput.readInt()); assertEquals(0, indexInput.readInt()); indexInput.readBytes(test, 0, 8); assertEquals((byte) 1, test[0]); assertEquals((byte) 8, test[7]); indexInput.readBytes(test, 0, 5); assertEquals((byte) 1, test[0]); assertEquals((byte) 5, test[4]); indexInput.seek(28); assertEquals((byte) 4, indexInput.readByte()); indexInput.seek(30); assertEquals((byte) 6, indexInput.readByte()); indexInput.close(); }
public void testReadPastEOF() throws IOException { setUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); IndexInput is = cr.openInput("f2"); is.seek(is.length() - 10); byte b[] = new byte[100]; is.readBytes(b, 0, 10); try { is.readByte(); fail("Single byte read past end of file"); } catch (IOException e) { /* success */ // System.out.println("SUCCESS: single byte read past end of file: " + e); } is.seek(is.length() - 10); try { is.readBytes(b, 0, 50); fail("Block read past end of file"); } catch (IOException e) { /* success */ // System.out.println("SUCCESS: block read past end of file: " + e); } is.close(); cr.close(); }
/* Does initial decode of next block of terms; this doesn't actually decode the docFreq, totalTermFreq, postings details (frq/prx offset, etc.) metadata; it just loads them as byte[] blobs which are then decoded on-demand if the metadata is ever requested for any term in this block. This enables terms-only intensive consumes (eg certain MTQs, respelling) to not pay the price of decoding metadata they won't use. */ private boolean nextBlock() throws IOException { // TODO: we still lazy-decode the byte[] for each // term (the suffix), but, if we decoded // all N terms up front then seeking could do a fast // bsearch w/in the block... // System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this); state.blockFilePointer = in.getFilePointer(); blockTermCount = in.readVInt(); // System.out.println(" blockTermCount=" + blockTermCount); if (blockTermCount == 0) { return false; } termBlockPrefix = in.readVInt(); // term suffixes: int len = in.readVInt(); if (termSuffixes.length < len) { termSuffixes = new byte[ArrayUtil.oversize(len, 1)]; } // System.out.println(" termSuffixes len=" + len); in.readBytes(termSuffixes, 0, len); termSuffixesReader.reset(termSuffixes, 0, len); // docFreq, totalTermFreq len = in.readVInt(); if (docFreqBytes.length < len) { docFreqBytes = new byte[ArrayUtil.oversize(len, 1)]; } // System.out.println(" freq bytes len=" + len); in.readBytes(docFreqBytes, 0, len); freqReader.reset(docFreqBytes, 0, len); // metadata len = in.readVInt(); if (bytes == null) { bytes = new byte[ArrayUtil.oversize(len, 1)]; bytesReader = new ByteArrayDataInput(); } else if (bytes.length < len) { bytes = new byte[ArrayUtil.oversize(len, 1)]; } in.readBytes(bytes, 0, len); bytesReader.reset(bytes, 0, len); metaDataUpto = 0; state.termBlockOrd = 0; indexIsCurrent = false; // System.out.println(" indexIsCurrent=" + indexIsCurrent); return true; }
@Override public BytesRef getPayload() throws IOException { if (!payloadPending) { return null; } if (pendingPayloadBytes == 0) { return payload; } assert pendingPayloadBytes >= payloadLength; if (pendingPayloadBytes > payloadLength) { payloadIn.seek(payloadIn.getFilePointer() + (pendingPayloadBytes - payloadLength)); } if (payload == null) { payload = new BytesRef(); payload.bytes = new byte[payloadLength]; } else if (payload.bytes.length < payloadLength) { payload.grow(payloadLength); } payloadIn.readBytes(payload.bytes, 0, payloadLength); payload.length = payloadLength; pendingPayloadBytes = 0; return payload; }
private void testOn(Directory dir, int writeSize, int readSize, Cache cache) throws IOException { if (cache != null) cache .clear(); // needed to make sure no chunks are left over in case of Infinispan // implementation final String filename = "chunkTest"; IndexOutput indexOutput = dir.createOutput(filename); byte[] toWrite = fillBytes(writeSize); indexOutput.writeBytes(toWrite, writeSize); indexOutput.close(); if (cache != null) { AssertJUnit.assertEquals( writeSize, DirectoryIntegrityCheck.deepCountFileSize(new FileCacheKey(INDEXNAME, filename), cache)); } AssertJUnit.assertEquals(writeSize, indexOutput.length()); byte[] results = new byte[readSize]; IndexInput openInput = dir.openInput(filename); try { openInput.readBytes(results, 0, readSize); for (int i = 0; i < writeSize && i < readSize; i++) { AssertJUnit.assertEquals(results[i], toWrite[i]); } if (readSize > writeSize) AssertJUnit.fail("should have thrown an IOException for reading past EOF"); } catch (IOException ioe) { if (readSize <= writeSize) AssertJUnit.fail("should not have thrown an IOException" + ioe.getMessage()); } }
/** * Used to verify that IndexInput.readBytes method reads correctly the whole file content * comparing the result with the expected sequence of bytes * * @param dir The Directory containing the file to verify * @param fileName The file name to read * @param contentFileSizeExpected The size content file expected * @param arrayLengthToRead The length of byte array to read * @throws IOException */ private void assertReadBytesWorkingCorrectly( Directory dir, String fileName, final int contentFileSizeExpected, final int arrayLengthToRead) throws IOException { IndexInput indexInput = dir.openInput(fileName); AssertJUnit.assertEquals(contentFileSizeExpected, indexInput.length()); RepeatableLongByteSequence bytesGenerator = new RepeatableLongByteSequence(); byte[] readBytes = new byte[arrayLengthToRead]; byte[] expectedBytes = new byte[arrayLengthToRead]; long toRead = contentFileSizeExpected; while (toRead > 0) { // the condition is satisfied when the file is close to the end if (toRead < arrayLengthToRead) { readBytes = new byte[(int) toRead]; expectedBytes = new byte[(int) toRead]; } int nextBytesToRead = (int) Math.min(toRead, arrayLengthToRead); bytesGenerator.nextBytes(expectedBytes); indexInput.readBytes(readBytes, 0, nextBytesToRead); assert Arrays.equals(expectedBytes, readBytes); toRead = toRead - nextBytesToRead; } indexInput.close(); }
public void multipleFlushTest() throws IOException { final String filename = "longFile.writtenInMultipleFlushes"; final int bufferSize = 300; Cache cache = cacheManager.getCache(); cache.clear(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(13) .create(); byte[] manyBytes = fillBytes(bufferSize); IndexOutput indexOutput = dir.createOutput(filename); for (int i = 0; i < 10; i++) { indexOutput.writeBytes(manyBytes, bufferSize); indexOutput.flush(); } indexOutput.close(); IndexInput input = dir.openInput(filename); final int finalSize = (10 * bufferSize); AssertJUnit.assertEquals(finalSize, input.length()); final byte[] resultingBuffer = new byte[finalSize]; input.readBytes(resultingBuffer, 0, finalSize); int index = 0; for (int i = 0; i < 10; i++) { for (int j = 0; j < bufferSize; j++) AssertJUnit.assertEquals(resultingBuffer[index++], manyBytes[j]); } }
private static BytesRef readBytesRef(IndexInput in) throws IOException { BytesRef bytes = new BytesRef(); bytes.length = in.readVInt(); bytes.bytes = new byte[bytes.length]; in.readBytes(bytes.bytes, 0, bytes.length); return bytes; }
private void addField( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { // we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field( fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field( fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); } }
private void assertSameStreams(String msg, IndexInput expected, IndexInput test) throws IOException { assertNotNull(msg + " null expected", expected); assertNotNull(msg + " null test", test); assertEquals(msg + " length", expected.length(), test.length()); assertEquals(msg + " position", expected.getFilePointer(), test.getFilePointer()); byte expectedBuffer[] = new byte[512]; byte testBuffer[] = new byte[expectedBuffer.length]; long remainder = expected.length() - expected.getFilePointer(); while (remainder > 0) { int readLen = (int) Math.min(remainder, expectedBuffer.length); expected.readBytes(expectedBuffer, 0, readLen); test.readBytes(testBuffer, 0, readLen); assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
private void verifyData(ByteBufferDirectory dir) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; assertThat(dir.fileExists("value1"), equalTo(true)); assertThat(dir.fileLength("value1"), equalTo(38l)); IndexInput indexInput = dir.openInput("value1", IOContext.DEFAULT); indexInput.readBytes(test, 0, 5); assertThat(test[0], equalTo((byte) 8)); assertThat(indexInput.readInt(), equalTo(-1)); assertThat(indexInput.readLong(), equalTo((long) 10)); assertThat(indexInput.readInt(), equalTo(0)); assertThat(indexInput.readInt(), equalTo(0)); indexInput.readBytes(test, 0, 8); assertThat(test[0], equalTo((byte) 1)); assertThat(test[7], equalTo((byte) 8)); indexInput.readBytes(test, 0, 5); assertThat(test[0], equalTo((byte) 1)); assertThat(test[4], equalTo((byte) 5)); indexInput.seek(28); assertThat(indexInput.readByte(), equalTo((byte) 4)); indexInput.seek(30); assertThat(indexInput.readByte(), equalTo((byte) 6)); indexInput.seek(0); indexInput.readBytes(test, 0, 5); assertThat(test[0], equalTo((byte) 8)); indexInput.close(); indexInput = dir.openInput("value1", IOContext.DEFAULT); // iterate over all the data for (int i = 0; i < 38; i++) { indexInput.readByte(); } indexInput.close(); }
@Override public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { final SepTermState termState = (SepTermState) _termState; // System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer()); final int len = termsIn.readVInt(); // System.out.println(" numBytes=" + len); if (termState.bytes == null) { termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; termState.bytesReader = new ByteArrayDataInput(termState.bytes); } else if (termState.bytes.length < len) { termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; } termState.bytesReader.reset(termState.bytes, 0, len); termsIn.readBytes(termState.bytes, 0, len); }
// in merge mode we don't uncompress the data of a compressed field private void addFieldForMerge( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { Object data; if (binary || compressed) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); }
public void testWriteChunks() throws Exception { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 66); io.writeByte((byte) 69); io.flush(); io.close(); assert dir.fileExists("MyNewFile.txt"); assert null != cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); // test contents by reading: byte[] buf = new byte[9]; IndexInput ii = dir.openInput("MyNewFile.txt"); ii.readBytes(buf, 0, (int) ii.length()); ii.close(); assert new String(new byte[] {66, 69}).equals(new String(buf).trim()); String testText = "This is some rubbish again that will span more than one chunk - one hopes. Who knows, maybe even three or four chunks."; io = dir.createOutput("MyNewFile.txt"); io.seek(0); io.writeBytes(testText.getBytes(), 0, testText.length()); io.close(); // now compare. byte[] chunk1 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); byte[] chunk2 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 1, BUFFER_SIZE)); assert null != chunk1; assert null != chunk2; assert testText.equals(new String(chunk1) + new String(chunk2).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
public Term next() { assert hasNext(); try { int code = input.readVInt(); if ((code & 1) != 0) { // new field field = input.readString(); } int prefix = code >>> 1; int suffix = input.readVInt(); bytes.grow(prefix + suffix); input.readBytes(bytes.bytes, prefix, suffix); bytes.length = prefix + suffix; term.set(field, bytes); return term; } catch (IOException e) { throw new RuntimeException(e); } }
private void corruptFile(Directory dir, String fileIn, String fileOut) throws IOException { IndexInput input = dir.openInput(fileIn, IOContext.READONCE); IndexOutput output = dir.createOutput(fileOut, IOContext.DEFAULT); long len = input.length(); byte[] b = new byte[1024]; long broken = randomInt((int) len); long pos = 0; while (pos < len) { int min = (int) Math.min(input.length() - pos, b.length); input.readBytes(b, 0, min); if (broken >= pos && broken < pos + min) { // Flip one byte int flipPos = (int) (broken - pos); b[flipPos] = (byte) (b[flipPos] ^ 42); } output.writeBytes(b, min); pos += min; } IOUtils.close(input, output); }
private void readIndexInputFullyWithRandomSeeks(IndexInput indexInput) throws IOException { BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long pos = 0; while (pos < indexInput.length()) { assertEquals(pos, indexInput.getFilePointer()); int op = random().nextInt(5); if (op == 0) { int shift = 100 - randomIntBetween(0, 200); pos = Math.min(indexInput.length() - 1, Math.max(0, pos + shift)); indexInput.seek(pos); } else if (op == 1) { indexInput.readByte(); pos++; } else { int min = (int) Math.min(indexInput.length() - pos, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); pos += min; } } }
/** * Loads the actual byte array from a segment, in the range of a specific chunkSize. Not that * while the chunkSize is specified in this case, it's likely derived from the invocations of * other loading methods. */ private byte[] loadIntern(final ChunkCacheKey key) throws IOException { final String fileName = key.getFileName(); final long chunkId = key.getChunkId(); // needs to be long to upcast following operations int bufferSize = key.getBufferSize(); final long seekTo = chunkId * bufferSize; final byte[] buffer; final IndexInput input = directory.openInput(fileName, IOContext.READ); final long length = input.length(); try { if (seekTo != 0) { input.seek(seekTo); } bufferSize = (int) Math.min(length - seekTo, (long) bufferSize); buffer = new byte[bufferSize]; input.readBytes(buffer, 0, bufferSize); } finally { input.close(); } return buffer; }
@Test public void testVerifyingIndexOutput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); indexInput.seek(0); BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long length = indexInput.length(); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT)); while (length > 0) { if (random().nextInt(10) == 0) { verifyingOutput.writeByte(indexInput.readByte()); length--; } else { int min = (int) Math.min(length, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); verifyingOutput.writeBytes(ref.bytes, ref.offset, min); length -= min; } } Store.verify(verifyingOutput); verifyingOutput.writeByte((byte) 0x0); try { Store.verify(verifyingOutput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(indexInput, verifyingOutput, dir); }
@Test public void testWriteChunksDefaultChunks() throws Exception { Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME).create(); final String testText = "This is some rubbish"; final byte[] testTextAsBytes = testText.getBytes(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 1); io.writeByte((byte) 2); io.writeByte((byte) 3); io.writeBytes(testTextAsBytes, testTextAsBytes.length); io.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); FileCacheKey fileCacheKey = new FileCacheKey(INDEXNAME, "MyNewFile.txt"); assert null != cache.get(fileCacheKey); FileMetadata metadata = (FileMetadata) cache.get(fileCacheKey); AssertJUnit.assertEquals(testTextAsBytes.length + 3, metadata.getSize()); assert null != cache.get( new ChunkCacheKey( INDEXNAME, "MyNewFile.txt", 0, DirectoryBuilderImpl.DEFAULT_BUFFER_SIZE)); // test contents by reading: IndexInput ii = dir.openInput("MyNewFile.txt"); assert ii.readByte() == 1; assert ii.readByte() == 2; assert ii.readByte() == 3; byte[] buf = new byte[testTextAsBytes.length]; ii.readBytes(buf, 0, testTextAsBytes.length); ii.close(); assert testText.equals(new String(buf).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
/** * The value of the field in Binary, or null. If null, the Reader or String value is used. * Exactly one of stringValue(), readerValue() and binaryValue() must be set. */ public byte[] binaryValue() { if (fieldsData == null) { final byte[] b = new byte[toRead]; IndexInput localFieldsStream = getFieldStream(); // Throw this IO Exception since IndexREader.document does so anyway, so probably not that // big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.seek(pointer); localFieldsStream.readBytes(b, 0, b.length); if (isCompressed == true) { fieldsData = uncompress(b); } else { fieldsData = b; } } catch (IOException e) { throw new FieldReaderException(e); } } return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; }
/** * The value of the field as a String, or null. If null, the Reader value or binary value is * used. Exactly one of stringValue(), readerValue(), and binaryValue() must be set. */ public String stringValue() { if (fieldsData == null) { IndexInput localFieldsStream = getFieldStream(); try { localFieldsStream.seek(pointer); if (isCompressed) { final byte[] b = new byte[toRead]; localFieldsStream.readBytes(b, 0, b.length); fieldsData = new String(uncompress(b), "UTF-8"); } else { // read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.readChars(chars, 0, toRead); fieldsData = new String(chars); } } catch (IOException e) { throw new FieldReaderException(e); } } return fieldsData instanceof String ? (String) fieldsData : null; }
/** * closes temporary file, compresses data and removes temporary file. * * @throws IOException */ @Override public void close() throws IOException { byte[] buffer = new byte[chunkSize]; tempOut.close(); // directory with offsets offsets of compressed chunks with // real position in decompressed stream IndexInput in = tempDirectory.openInput(tmpName); long len = closeLength = in.length(); // write length of the file at the begining for easier retreval output.writeLong(-1); // write configuration writeConfig(); int toRead; // read all data and compresse it in variable block chunks while (len > 0) { if (len > buffer.length) { toRead = buffer.length; } else { toRead = (int) len; } // just for safety --- can be improoved long bufferPos = in.getFilePointer(); // read original data in.readBytes(buffer, 0, toRead); writeChunk(buffer, bufferPos, toRead); len -= toRead; } // now let's crate directory entry of all chunks and their's original // position in inflated stream in.close(); if (tempDirectory.fileExists(tmpName)) { tempDirectory.deleteFile(tmpName); } super.close(); }
public void multipleFlushTest() throws IOException { final String filename = "longFile.writtenInMultipleFlushes"; final int bufferSize = 300; Cache cache = cacheManager.getCache(); cache.clear(); InfinispanDirectory dir = new InfinispanDirectory(cache, cache, cache, INDEXNAME, 13); byte[] manyBytes = fillBytes(bufferSize); IndexOutput indexOutput = dir.createOutput(filename); for (int i = 0; i < 10; i++) { indexOutput.writeBytes(manyBytes, bufferSize); indexOutput.flush(); } indexOutput.close(); IndexInput input = dir.openInput(filename); final int finalSize = (10 * bufferSize); assert input.length() == finalSize; final byte[] resultingBuffer = new byte[finalSize]; input.readBytes(resultingBuffer, 0, finalSize); int index = 0; for (int i = 0; i < 10; i++) { for (int j = 0; j < bufferSize; j++) assert resultingBuffer[index++] == manyBytes[j]; } }
/** Sole constructor. */ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null; this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader( termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader( indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put( fieldInfo.name, new FieldReader( this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
@Override public void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException { delegate.readBytes(b, offset, len, useBuffer); }
@Override public void readBytes(byte[] b, int offset, int len) throws IOException { delegate.readBytes(b, offset, len); }
@Override public void readInternal(byte[] b, int offset, int length) throws IOException { simOutage(); delegate.seek(getFilePointer()); delegate.readBytes(b, offset, length); }
@Test(enabled = false) public void testReadChunks() throws Exception { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); // create file headers FileMetadata file1 = new FileMetadata(5); FileCacheKey key1 = new FileCacheKey(INDEXNAME, "Hello.txt"); cache.put(key1, file1); FileMetadata file2 = new FileMetadata(5); FileCacheKey key2 = new FileCacheKey(INDEXNAME, "World.txt"); cache.put(key2, file2); // byte array for Hello.txt String helloText = "Hello world. This is some text."; cache.put(new ChunkCacheKey(INDEXNAME, "Hello.txt", 0, BUFFER_SIZE), helloText.getBytes()); // byte array for World.txt - should be in at least 2 chunks. String worldText = "This String should contain more than sixty four characters but less than one hundred and twenty eight."; assert worldText.getBytes().length > BUFFER_SIZE; assert worldText.getBytes().length < (2 * BUFFER_SIZE); byte[] buf = new byte[BUFFER_SIZE]; System.arraycopy(worldText.getBytes(), 0, buf, 0, BUFFER_SIZE); cache.put(new ChunkCacheKey(INDEXNAME, "World.txt", 0, BUFFER_SIZE), buf); String part1 = new String(buf); buf = new byte[BUFFER_SIZE]; System.arraycopy(worldText.getBytes(), BUFFER_SIZE, buf, 0, worldText.length() - BUFFER_SIZE); cache.put(new ChunkCacheKey(INDEXNAME, "World.txt", 1, BUFFER_SIZE), buf); String part2 = new String(buf); // make sure the generated bytes do add up! AssertJUnit.assertEquals(part1 + part2.trim(), worldText); file1.setSize(helloText.length()); file2.setSize(worldText.length()); Set<String> s = new HashSet<String>(); s.add("Hello.txt"); s.add("World.txt"); Set other = new HashSet(Arrays.asList(dir.listAll())); // ok, file listing works. AssertJUnit.assertEquals(s, other); IndexInput ii = dir.openInput("Hello.txt"); assert ii.length() == helloText.length(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (int i = 0; i < ii.length(); i++) { baos.write(ii.readByte()); } assert new String(baos.toByteArray()).equals(helloText); ii = dir.openInput("World.txt"); assert ii.length() == worldText.length(); baos = new ByteArrayOutputStream(); for (int i = 0; i < ii.length(); i++) { baos.write(ii.readByte()); } assert new String(baos.toByteArray()).equals(worldText); // now with buffered reading ii = dir.openInput("Hello.txt"); assert ii.length() == helloText.length(); baos = new ByteArrayOutputStream(); long toRead = ii.length(); while (toRead > 0) { buf = new byte[19]; // suitably arbitrary int bytesRead = (int) Math.min(toRead, 19); ii.readBytes(buf, 0, bytesRead); toRead = toRead - bytesRead; baos.write(buf, 0, bytesRead); } assert new String(baos.toByteArray()).equals(helloText); ii = dir.openInput("World.txt"); assert ii.length() == worldText.length(); baos = new ByteArrayOutputStream(); toRead = ii.length(); while (toRead > 0) { buf = new byte[19]; // suitably arbitrary int bytesRead = (int) Math.min(toRead, 19); ii.readBytes(buf, 0, bytesRead); toRead = toRead - bytesRead; baos.write(buf, 0, bytesRead); } assert new String(baos.toByteArray()).equals(worldText); dir.deleteFile("Hello.txt"); assert null == cache.get(new FileCacheKey(INDEXNAME, "Hello.txt")); assert null == cache.get(new ChunkCacheKey(INDEXNAME, "Hello.txt", 0, BUFFER_SIZE)); Object ob1 = cache.get(new FileCacheKey(INDEXNAME, "World.txt")); Object ob2 = cache.get(new ChunkCacheKey(INDEXNAME, "World.txt", 0, BUFFER_SIZE)); Object ob3 = cache.get(new ChunkCacheKey(INDEXNAME, "World.txt", 1, BUFFER_SIZE)); ((DirectoryExtensions) dir).renameFile("World.txt", "HelloWorld.txt"); assert null == cache.get(new FileCacheKey(INDEXNAME, "Hello.txt")); assert null == cache.get(new ChunkCacheKey(INDEXNAME, "Hello.txt", 0, BUFFER_SIZE)); assert null == cache.get(new ChunkCacheKey(INDEXNAME, "Hello.txt", 1, BUFFER_SIZE)); assert cache.get(new FileCacheKey(INDEXNAME, "HelloWorld.txt")).equals(ob1); assert cache.get(new ChunkCacheKey(INDEXNAME, "HelloWorld.txt", 0, BUFFER_SIZE)).equals(ob2); assert cache.get(new ChunkCacheKey(INDEXNAME, "HelloWorld.txt", 1, BUFFER_SIZE)).equals(ob3); // test that contents survives a move ii = dir.openInput("HelloWorld.txt"); assert ii.length() == worldText.length(); baos = new ByteArrayOutputStream(); toRead = ii.length(); while (toRead > 0) { buf = new byte[19]; // suitably arbitrary int bytesRead = (int) Math.min(toRead, 19); ii.readBytes(buf, 0, bytesRead); toRead = toRead - bytesRead; baos.write(buf, 0, bytesRead); } assert new String(baos.toByteArray()).equals(worldText); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.name); data.seek(entry.offset); switch (entry.format) { case TABLE_COMPRESSED: int size = data.readVInt(); if (size > 256) { throw new CorruptIndexException( "TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + size, data); } final long decode[] = new long[size]; for (int i = 0; i < decode.length; i++) { decode[i] = data.readLong(); } final int formatID = data.readVInt(); final int bitsPerValue = data.readVInt(); final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader( data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue); if (!merging) { ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed()); numericInfo.put(field.name, ordsReader); } return new NumericDocValues() { @Override public long get(int docID) { return decode[(int) ordsReader.get(docID)]; } }; case DELTA_COMPRESSED: final int blockSize = data.readVInt(); final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false); if (!merging) { ramBytesUsed.addAndGet(reader.ramBytesUsed()); numericInfo.put(field.name, reader); } return reader; case UNCOMPRESSED: final byte bytes[] = new byte[maxDoc]; data.readBytes(bytes, 0, bytes.length); if (!merging) { ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes)); numericInfo.put(field.name, Accountables.namedAccountable("byte array", maxDoc)); } return new NumericDocValues() { @Override public long get(int docID) { return bytes[docID]; } }; case GCD_COMPRESSED: final long min = data.readLong(); final long mult = data.readLong(); final int quotientBlockSize = data.readVInt(); final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false); if (!merging) { ramBytesUsed.addAndGet(quotientReader.ramBytesUsed()); numericInfo.put(field.name, quotientReader); } return new NumericDocValues() { @Override public long get(int docID) { return min + mult * quotientReader.get(docID); } }; default: throw new AssertionError(); } }