private void addUncompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(UNCOMPRESSED); // uncompressed byte[] meta.writeLong(data.getFilePointer()); for (Number nv : values) { data.writeByte(nv.byteValue()); } }
@Override public void write( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeHeader( output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.writeVInt(infos.size()); for (FieldInfo fi : infos) { IndexOptions indexOptions = fi.getIndexOptions(); byte bits = 0x0; if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS; if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; if (fi.getIndexOptions() != IndexOptions.NONE) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); if (indexOptions == IndexOptions.DOCS) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.writeString(fi.name); output.writeVInt(fi.number); output.writeByte(bits); // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType()); final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } CodecUtil.writeFooter(output); } }
// Make sure we don't somehow use more than 1 descriptor // when reading a CFS with many subs: public void testManySubFiles() throws IOException { final Directory d = newFSDirectory(_TestUtil.getTempDir("CFSManySubFiles")); final int FILE_COUNT = 10000; for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { IndexOutput out = d.createOutput("file." + fileIdx); out.writeByte((byte) fileIdx); out.close(); } final CompoundFileWriter cfw = new CompoundFileWriter(d, "c.cfs"); for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { cfw.addFile("file." + fileIdx); } cfw.close(); final IndexInput[] ins = new IndexInput[FILE_COUNT]; final CompoundFileReader cfr = new CompoundFileReader(d, "c.cfs"); for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { ins[fileIdx] = cfr.openInput("file." + fileIdx); } for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { assertEquals((byte) fileIdx, ins[fileIdx].readByte()); } for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { ins[fileIdx].close(); } cfr.close(); d.close(); }
private void demo_FSIndexInputBug(Directory fsdir, String file) throws IOException { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.createOutput(file); for (int i = 0; i < 2000; i++) { os.writeByte((byte) i); } os.close(); IndexInput in = fsdir.openInput(file); // This read primes the buffer in IndexInput in.readByte(); // Close the file in.close(); // ERROR: this call should fail, but succeeds because the buffer // is still filled in.readByte(); // ERROR: this call should fail, but succeeds for some reason as well in.seek(1099); try { // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails in.readByte(); fail("expected readByte() to throw exception"); } catch (IOException e) { // expected exception } }
private void writeTable( Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues, int numOrds) throws IOException { data.writeVInt(PackedInts.VERSION_CURRENT); data.writeVInt(compression.format.getId()); data.writeVInt(compression.bitsPerValue); data.writeVInt(numOrds); for (int i = 0; i < numOrds; i++) { data.writeByte(uniqueValues.values[i]); } final PackedInts.Writer writer = PackedInts.getWriterNoHeader( data, compression.format, count, compression.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE); for (Number nv : values) { int ord = uniqueValues.ord(nv.byteValue()); if (ord < numOrds) { writer.add(ord); } else { writer.add(numOrds); // collapses all ords >= numOrds into a single value } } writer.finish(); }
// encodes values as sparse array: keys[] and values[] // access is log(N) where N = keys.length (slow!) // so this is only appropriate as an exception table for patched, or when common value is 0 (wont // be accessed by searching) private void addIndirect( FieldInfo field, final Iterable<Number> values, int count, final NormMap uniqueValues, final int minOrd) throws IOException { int commonCount = uniqueValues.freqs[minOrd]; meta.writeVInt(count - commonCount); meta.writeByte(INDIRECT); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, minOrd); // write actual values writeNormsField( field, new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new FilterIterator<Number, Number>(values.iterator()) { @Override protected boolean predicateFunction(Number value) { return uniqueValues.ord(value.byteValue()) > minOrd; } }; } }, 1); }
/** Creates a file of the specified size with random data. */ private void createRandomFile(Directory dir, String name, int size) throws IOException { IndexOutput os = dir.createOutput(name); for (int i = 0; i < size; i++) { byte b = (byte) (Math.random() * 256); os.writeByte(b); } os.close(); }
/** * Creates a file of the specified size with sequential data. The first byte is written as the * start byte provided. All subsequent bytes are computed as start + offset where offset is the * number of the byte. */ private void createSequenceFile(Directory dir, String name, byte start, int size) throws IOException { IndexOutput os = dir.createOutput(name); for (int i = 0; i < size; i++) { os.writeByte(start); start++; } os.close(); }
private void addTableCompressed( Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(TABLE_COMPRESSED); // table-compressed meta.writeLong(data.getFilePointer()); writeTable(values, compression, count, uniqueValues, uniqueValues.size); }
/** * It creates a file with fixed size using a RepeatableLongByteSequence object to generate a * repeatable content * * @param dir The Directory containing the file to create * @param fileName The file name to create * @param contentFileSize The size content file to create * @throws IOException */ private void createFileWithRepeatableContent( Directory dir, String fileName, final int contentFileSize) throws IOException { IndexOutput indexOutput = dir.createOutput(fileName); RepeatableLongByteSequence bytesGenerator = new RepeatableLongByteSequence(); for (int i = 0; i < contentFileSize; i++) { indexOutput.writeByte(bytesGenerator.nextByte()); } indexOutput.close(); }
private void writeField() throws IOException { // remember where this field is written currentField.tvfPointer = tvf.getFilePointer(); // System.out.println("Field Pointer: " + currentField.tvfPointer); final int size = terms.size(); tvf.writeVInt(size); boolean storePositions = currentField.storePositions; boolean storeOffsets = currentField.storeOffsets; byte bits = 0x0; if (storePositions) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (storeOffsets) bits |= STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); String lastTermText = ""; for (int i = 0; i < size; i++) { TVTerm term = (TVTerm) terms.elementAt(i); int start = StringHelper.stringDifference(lastTermText, term.termText); int length = term.termText.length() - start; tvf.writeVInt(start); // write shared prefix length tvf.writeVInt(length); // write delta length tvf.writeChars(term.termText, start, length); // write delta chars tvf.writeVInt(term.freq); lastTermText = term.termText; if (storePositions) { if (term.positions == null) throw new IllegalStateException("Trying to write positions that are null!"); // use delta encoding for positions int position = 0; for (int j = 0; j < term.freq; j++) { tvf.writeVInt(term.positions[j] - position); position = term.positions[j]; } } if (storeOffsets) { if (term.offsets == null) throw new IllegalStateException("Trying to write offsets that are null!"); // use delta encoding for offsets int position = 0; for (int j = 0; j < term.freq; j++) { tvf.writeVInt(term.offsets[j].getStartOffset() - position); tvf.writeVInt( term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); // Save the diff between the two. position = term.offsets[j].getEndOffset(); } } } }
@Test public void testVerifyingIndexOutput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); indexInput.seek(0); BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long length = indexInput.length(); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT)); while (length > 0) { if (random().nextInt(10) == 0) { verifyingOutput.writeByte(indexInput.readByte()); length--; } else { int min = (int) Math.min(length, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); verifyingOutput.writeBytes(ref.bytes, ref.offset, min); length -= min; } } Store.verify(verifyingOutput); verifyingOutput.writeByte((byte) 0x0); try { Store.verify(verifyingOutput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(indexInput, verifyingOutput, dir); }
@Test public void testWriteChunksDefaultChunks() throws Exception { Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME).create(); final String testText = "This is some rubbish"; final byte[] testTextAsBytes = testText.getBytes(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 1); io.writeByte((byte) 2); io.writeByte((byte) 3); io.writeBytes(testTextAsBytes, testTextAsBytes.length); io.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); FileCacheKey fileCacheKey = new FileCacheKey(INDEXNAME, "MyNewFile.txt"); assert null != cache.get(fileCacheKey); FileMetadata metadata = (FileMetadata) cache.get(fileCacheKey); AssertJUnit.assertEquals(testTextAsBytes.length + 3, metadata.getSize()); assert null != cache.get( new ChunkCacheKey( INDEXNAME, "MyNewFile.txt", 0, DirectoryBuilderImpl.DEFAULT_BUFFER_SIZE)); // test contents by reading: IndexInput ii = dir.openInput("MyNewFile.txt"); assert ii.readByte() == 1; assert ii.readByte() == 2; assert ii.readByte() == 3; byte[] buf = new byte[testTextAsBytes.length]; ii.readBytes(buf, 0, testTextAsBytes.length); ii.close(); assert testText.equals(new String(buf).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
/** finishes writing */ public void finish() throws IOException { if (count != numValues) { throw new IllegalStateException( "Wrong number of values added, expected: " + numValues + ", got: " + count); } assert !finished; flush(); // pad for fast io: we actually only need this for certain BPV, but its just 3 bytes... for (int i = 0; i < 3; i++) { output.writeByte((byte) 0); } finished = true; }
private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(DELTA_COMPRESSED); // delta-compressed meta.writeLong(data.getFilePointer()); data.writeVInt(PackedInts.VERSION_CURRENT); data.writeVInt(BLOCK_SIZE); final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); for (Number nv : values) { writer.add(nv.longValue()); } writer.finish(); }
public void testWriteChunks() throws Exception { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); IndexOutput io = dir.createOutput("MyNewFile.txt"); io.writeByte((byte) 66); io.writeByte((byte) 69); io.flush(); io.close(); assert dir.fileExists("MyNewFile.txt"); assert null != cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); // test contents by reading: byte[] buf = new byte[9]; IndexInput ii = dir.openInput("MyNewFile.txt"); ii.readBytes(buf, 0, (int) ii.length()); ii.close(); assert new String(new byte[] {66, 69}).equals(new String(buf).trim()); String testText = "This is some rubbish again that will span more than one chunk - one hopes. Who knows, maybe even three or four chunks."; io = dir.createOutput("MyNewFile.txt"); io.seek(0); io.writeBytes(testText.getBytes(), 0, testText.length()); io.close(); // now compare. byte[] chunk1 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 0, BUFFER_SIZE)); byte[] chunk2 = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, "MyNewFile.txt", 1, BUFFER_SIZE)); assert null != chunk1; assert null != chunk2; assert testText.equals(new String(chunk1) + new String(chunk2).trim()); dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
@Test public void testVerifyingIndexOutputWithBogusInput() throws IOException { Directory dir = newDirectory(); int length = scaledRandomIntBetween(10, 1024); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, ""), dir.createOutput("foo1.bar", IOContext.DEFAULT)); try { while (length > 0) { verifyingOutput.writeByte((byte) random().nextInt()); length--; } fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(verifyingOutput, dir); }
private void insertData(CoherenceDirectory dir, String fileName) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput(fileName); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (dir.getBucketSize() > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
// encodes common values in a table, and the rest of the values as exceptions using INDIRECT. // the exceptions should not be accessed very often, since the values are uncommon private void addPatchedTable( FieldInfo field, final Iterable<Number> values, final int numCommonValues, int commonValuesCount, int count, final NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(PATCHED_TABLE); meta.writeLong(data.getFilePointer()); assert numCommonValues == 3 || numCommonValues == 15; FormatAndBits compression = fastestFormatAndBits(numCommonValues); writeTable(values, compression, count, uniqueValues, numCommonValues); meta.writeVInt(field.number); addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues); }
private void insertData(ByteBufferDirectory dir, int bufferSizeInBytes) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput("value1", IOContext.DEFAULT); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (bufferSizeInBytes > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
// encodes only uncommon values in a sparse bitset // access is constant time, and the common case is predictable // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2 // values) private void addPatchedBitset( FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues) throws IOException { int commonCount = uniqueValues.freqs[0]; meta.writeVInt(count - commonCount); meta.writeByte(PATCHED_BITSET); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, 0); // write exceptions: only two cases make sense // bpv = 1 (folded into sparse bitset already) // bpv > 1 (add indirect exception table) meta.writeVInt(field.number); if (uniqueValues.size == 2) { // special case: implicit in bitset addConstant(uniqueValues.values[1]); } else { // exception table addIndirect(field, values, count, uniqueValues, 0); } }
@Override public void write(int b) throws IOException { output.writeByte((byte) b); }
@Override public void writeByte(byte b) throws IOException { delegate.writeByte(b); }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }
@Override public void writeByte(byte b) throws IOException { wrapped.writeByte(b); }
private void addConstant(byte constant) throws IOException { meta.writeVInt(0); meta.writeByte(CONST_COMPRESSED); meta.writeLong(constant); }
@Test public void testWriteUsingSeekMethod() throws IOException { final int BUFFER_SIZE = 64; Cache cache = cacheManager.getCache(); Directory dir = DirectoryBuilder.newDirectoryInstance(cache, cache, cache, INDEXNAME) .chunkSize(BUFFER_SIZE) .create(); String fileName = "SomeText.txt"; IndexOutput io = dir.createOutput(fileName); RepeatableLongByteSequence bytesGenerator = new RepeatableLongByteSequence(); // It writes repeatable text final int REPEATABLE_BUFFER_SIZE = 1501; for (int i = 0; i < REPEATABLE_BUFFER_SIZE; i++) { io.writeByte(bytesGenerator.nextByte()); } io.flush(); assert io.length() == REPEATABLE_BUFFER_SIZE; // Text to write on file with repeatable text final String someText = "This is some text"; final byte[] someTextAsBytes = someText.getBytes(); // 4 points in random order where writing someText: at begin of file, at end of file, within a // single chunk, // between 2 chunks final int[] pointers = {0, 635, REPEATABLE_BUFFER_SIZE, 135}; for (int i = 0; i < pointers.length; i++) { io.seek(pointers[i]); io.writeBytes(someTextAsBytes, someTextAsBytes.length); } io.close(); bytesGenerator.reset(); final long finalSize = REPEATABLE_BUFFER_SIZE + someTextAsBytes.length; assert io.length() == finalSize; assert io.length() == DirectoryIntegrityCheck.deepCountFileSize(new FileCacheKey(INDEXNAME, fileName), cache); int indexPointer = 0; Arrays.sort(pointers); byte[] buffer = null; int chunkIndex = -1; // now testing the stream is equal to the produced repeatable but including the edits at pointed // positions for (int i = 0; i < REPEATABLE_BUFFER_SIZE + someTextAsBytes.length; i++) { if (i % BUFFER_SIZE == 0) { buffer = (byte[]) cache.get(new ChunkCacheKey(INDEXNAME, fileName, ++chunkIndex, BUFFER_SIZE)); } byte predictableByte = bytesGenerator.nextByte(); if (i < pointers[indexPointer]) { // Assert predictable text AssertJUnit.assertEquals(predictableByte, buffer[i % BUFFER_SIZE]); } else if (pointers[indexPointer] <= i && i < pointers[indexPointer] + someTextAsBytes.length) { // Assert someText AssertJUnit.assertEquals( someTextAsBytes[i - pointers[indexPointer]], buffer[i % BUFFER_SIZE]); } if (i == pointers[indexPointer] + someTextAsBytes.length) { // Change pointer indexPointer++; } } dir.close(); DirectoryIntegrityCheck.verifyDirectoryStructure(cache, INDEXNAME); }
@Override public void writeByte(byte b) throws IOException { tempOut.writeByte(b); }
/** * Called once per field per document if term vectors are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to the real term vectors files in the Directory. */ @Override void finish() throws IOException { assert docState.testPoint("TermVectorsTermsWriterPerField.finish start"); final int numPostings = termsHashPerField.bytesHash.size(); final BytesRef flushTerm = perThread.flushTerm; assert numPostings >= 0; if (!doVectors || numPostings == 0) return; if (numPostings > maxNumPostings) maxNumPostings = numPostings; final IndexOutput tvf = perThread.doc.perDocTvf; // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; assert perThread.vectorFieldsInOrder(fieldInfo); perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; // TODO: we may want to make this sort in same order // as Codec's terms dict? final int[] termIDs = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUnicodeComparator()); tvf.writeVInt(numPostings); byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; final ByteSliceReader reader = perThread.vectorSliceReader; final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; for (int j = 0; j < numPostings; j++) { final int termID = termIDs[j]; final int freq = postings.freqs[termID]; // Get BytesRef termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]); // Compute common byte prefix between last term and // this term int prefix = 0; if (j > 0) { while (prefix < lastLen && prefix < flushTerm.length) { if (lastBytes[lastStart + prefix] != flushTerm.bytes[flushTerm.offset + prefix]) { break; } prefix++; } } lastLen = flushTerm.length; lastBytes = flushTerm.bytes; lastStart = flushTerm.offset; final int suffix = flushTerm.length - prefix; tvf.writeVInt(prefix); tvf.writeVInt(suffix); tvf.writeBytes(flushTerm.bytes, lastStart + prefix, suffix); tvf.writeVInt(freq); if (doVectorPositions) { termsHashPerField.initReader(reader, termID, 0); reader.writeTo(tvf); } if (doVectorOffsets) { termsHashPerField.initReader(reader, termID, 1); reader.writeTo(tvf); } } termsHashPerField.reset(); // NOTE: we clear, per-field, at the thread level, // because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large // fields w/ term vectors on) because we recycle/reuse // all RAM after each field: perThread.termsHashPerThread.reset(false); }