@Override public void close() throws IOException { IOException ioe = null; try { final long dirStart = out.getFilePointer(); final long indexDirStart = indexOut.getFilePointer(); out.writeVInt(fields.size()); for (FieldMetaData field : fields) { // System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); out.writeVInt(field.fieldInfo.number); out.writeVLong(field.numTerms); out.writeVInt(field.rootCode.length); out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length); if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { out.writeVLong(field.sumTotalTermFreq); } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); indexOut.writeVLong(field.indexStartFP); } writeTrailer(out, dirStart); writeIndexTrailer(indexOut, indexDirStart); } catch (IOException ioe2) { ioe = ioe2; } finally { IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter); } }
@Override public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); int numFiles = si.files().size(); String names[] = si.files().toArray(new String[numFiles]); Arrays.sort(names); long startOffsets[] = new long[numFiles]; long endOffsets[] = new long[numFiles]; BytesRefBuilder scratch = new BytesRefBuilder(); try (IndexOutput out = dir.createOutput(dataFile, context)) { for (int i = 0; i < names.length; i++) { // write header for file SimpleTextUtil.write(out, HEADER); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); // write bytes for file startOffsets[i] = out.getFilePointer(); try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) { out.copyBytes(in, in.length()); } endOffsets[i] = out.getFilePointer(); } long tocPos = out.getFilePointer(); // write CFS table SimpleTextUtil.write(out, TABLE); SimpleTextUtil.write(out, Integer.toString(numFiles), scratch); SimpleTextUtil.writeNewline(out); for (int i = 0; i < names.length; i++) { SimpleTextUtil.write(out, TABLENAME); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLESTART); SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLEEND); SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); } DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); SimpleTextUtil.write(out, TABLEPOS); SimpleTextUtil.write(out, df.format(tocPos), scratch); SimpleTextUtil.writeNewline(out); } }
@Override public void resetSkip() { super.resetSkip(); Arrays.fill(lastSkipDoc, 0); Arrays.fill( lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list Arrays.fill( lastSkipOffsetLength, -1); // we don't have to write the first length in the skip list Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer()); if (proxOutput != null) Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer()); }
/** Sets the values for the current skip data. */ public void setSkipData( int doc, boolean storePayloads, int payloadLength, boolean storeOffsets, int offsetLength) { assert storePayloads || payloadLength == -1; assert storeOffsets || offsetLength == -1; this.curDoc = doc; this.curStorePayloads = storePayloads; this.curPayloadLength = payloadLength; this.curStoreOffsets = storeOffsets; this.curOffsetLength = offsetLength; this.curFreqPointer = freqOutput.getFilePointer(); if (proxOutput != null) this.curProxPointer = proxOutput.getFilePointer(); }
// encodes values as sparse array: keys[] and values[] // access is log(N) where N = keys.length (slow!) // so this is only appropriate as an exception table for patched, or when common value is 0 (wont // be accessed by searching) private void addIndirect( FieldInfo field, final Iterable<Number> values, int count, final NormMap uniqueValues, final int minOrd) throws IOException { int commonCount = uniqueValues.freqs[minOrd]; meta.writeVInt(count - commonCount); meta.writeByte(INDIRECT); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, minOrd); // write actual values writeNormsField( field, new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new FilterIterator<Number, Number>(values.iterator()) { @Override protected boolean predicateFunction(Number value) { return uniqueValues.ord(value.byteValue()) > minOrd; } }; } }, 1); }
public void close() throws IOException { try { final long dirStart = out.getFilePointer(); final int fieldCount = fields.size(); int nonNullFieldCount = 0; for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields.get(i); if (field.fst != null) { nonNullFieldCount++; } } out.writeVInt(nonNullFieldCount); for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields.get(i); if (field.fst != null) { out.writeVInt(field.fieldInfo.number); out.writeVLong(field.indexStart); } } writeTrailer(dirStart); } finally { out.close(); } }
private void addUncompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(UNCOMPRESSED); // uncompressed byte[] meta.writeLong(data.getFilePointer()); for (Number nv : values) { data.writeByte(nv.byteValue()); } }
/** * This test that writes larger than the size of the buffer output will correctly increment the * file pointer. */ public void testLargeWrites() throws IOException { IndexOutput os = dir.createOutput("testBufferStart.txt"); byte[] largeBuf = new byte[2048]; for (int i = 0; i < largeBuf.length; i++) { largeBuf[i] = (byte) (Math.random() * 256); } long currentPos = os.getFilePointer(); os.writeBytes(largeBuf, largeBuf.length); try { assertEquals(currentPos + largeBuf.length, os.getFilePointer()); } finally { os.close(); } }
@Override public void flushTermsBlock(int start, int count) throws IOException { if (DEBUG) System.out.println( "PW: flushTermsBlock start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size()); int wrappedCount = 0; assert buffer.getFilePointer() == 0; assert start >= count; final int limit = pendingTerms.size() - start + count; for (int idx = pendingTerms.size() - start; idx < limit; idx++) { final PendingTerm term = pendingTerms.get(idx); if (term == null) { wrappedCount++; } else { buffer.writeVInt(term.bytes.length); buffer.writeBytes(term.bytes, 0, term.bytes.length); } } termsOut.writeVInt((int) buffer.getFilePointer()); buffer.writeTo(termsOut); buffer.reset(); // TDOO: this could be somewhat costly since // pendingTerms.size() could be biggish? int futureWrappedCount = 0; final int limit2 = pendingTerms.size(); for (int idx = limit; idx < limit2; idx++) { if (pendingTerms.get(idx) == null) { futureWrappedCount++; } } // Remove the terms we just wrote: pendingTerms.subList(pendingTerms.size() - start, limit).clear(); if (DEBUG) System.out.println( "PW: len=" + buffer.getFilePointer() + " fp=" + termsOut.getFilePointer() + " futureWrappedCount=" + futureWrappedCount + " wrappedCount=" + wrappedCount); // TODO: can we avoid calling this if all terms // were inlined...? Eg for a "primary key" field, the // wrapped codec is never invoked... wrappedPostingsWriter.flushTermsBlock(futureWrappedCount + wrappedCount, wrappedCount); }
private void addTableCompressed( Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(TABLE_COMPRESSED); // table-compressed meta.writeLong(data.getFilePointer()); writeTable(values, compression, count, uniqueValues, uniqueValues.size); }
public void testEncodeDecode() throws IOException { final int iterations = RandomInts.randomIntBetween(random(), 1, 1000); final float acceptableOverheadRatio = random().nextFloat(); final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { final int bpv = random().nextInt(32); if (bpv == 0) { final int value = RandomInts.randomIntBetween(random(), 0, Integer.MAX_VALUE); for (int j = 0; j < BLOCK_SIZE; ++j) { values[i * BLOCK_SIZE + j] = value; } } else { for (int j = 0; j < BLOCK_SIZE; ++j) { values[i * BLOCK_SIZE + j] = RandomInts.randomIntBetween(random(), 0, (int) PackedInts.maxValue(bpv)); } } } final Directory d = new RAMDirectory(); final long endPointer; { // encode IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT); final ForUtil forUtil = new ForUtil(acceptableOverheadRatio, out); for (int i = 0; i < iterations; ++i) { forUtil.writeBlock( Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length), new byte[MAX_ENCODED_SIZE], out); } endPointer = out.getFilePointer(); out.close(); } { // decode IndexInput in = d.openInput("test.bin", IOContext.READONCE); final ForUtil forUtil = new ForUtil(in); for (int i = 0; i < iterations; ++i) { if (random().nextBoolean()) { forUtil.skipBlock(in); continue; } final int[] restored = new int[MAX_DATA_SIZE]; forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored); assertArrayEquals( Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE), Arrays.copyOf(restored, BLOCK_SIZE)); } assertEquals(endPointer, in.getFilePointer()); in.close(); } }
private void writeField() throws IOException { // remember where this field is written currentField.tvfPointer = tvf.getFilePointer(); // System.out.println("Field Pointer: " + currentField.tvfPointer); final int size = terms.size(); tvf.writeVInt(size); boolean storePositions = currentField.storePositions; boolean storeOffsets = currentField.storeOffsets; byte bits = 0x0; if (storePositions) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (storeOffsets) bits |= STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); String lastTermText = ""; for (int i = 0; i < size; i++) { TVTerm term = (TVTerm) terms.elementAt(i); int start = StringHelper.stringDifference(lastTermText, term.termText); int length = term.termText.length() - start; tvf.writeVInt(start); // write shared prefix length tvf.writeVInt(length); // write delta length tvf.writeChars(term.termText, start, length); // write delta chars tvf.writeVInt(term.freq); lastTermText = term.termText; if (storePositions) { if (term.positions == null) throw new IllegalStateException("Trying to write positions that are null!"); // use delta encoding for positions int position = 0; for (int j = 0; j < term.freq; j++) { tvf.writeVInt(term.positions[j] - position); position = term.positions[j]; } } if (storeOffsets) { if (term.offsets == null) throw new IllegalStateException("Trying to write offsets that are null!"); // use delta encoding for offsets int position = 0; for (int j = 0; j < term.freq; j++) { tvf.writeVInt(term.offsets[j].getStartOffset() - position); tvf.writeVInt( term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); // Save the diff between the two. position = term.offsets[j].getEndOffset(); } } } }
@Override public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); final Store store = recoveryStatus.store(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); final RecoveryState.Index indexState = recoveryStatus.state().getIndex(); if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) { indexState.addSourceThrottling(request.sourceThrottleTimeInNanos()); } IndexOutput indexOutput; if (request.position() == 0) { indexOutput = recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store); } else { indexOutput = recoveryStatus.getOpenIndexOutput(request.name()); } BytesReference content = request.content(); if (!content.hasArray()) { content = content.toBytesArray(); } RateLimiter rl = recoverySettings.rateLimiter(); if (rl != null) { long bytes = bytesSinceLastPause.addAndGet(content.length()); if (bytes > rl.getMinPauseCheckBytes()) { // Time to pause bytesSinceLastPause.addAndGet(-bytes); long throttleTimeInNanos = rl.pause(bytes); indexState.addTargetThrottling(throttleTimeInNanos); recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos); } } indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length()); indexState.addRecoveredBytesToFile(request.name(), content.length()); if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) { try { Store.verify(indexOutput); } finally { // we are done indexOutput.close(); } // write the checksum recoveryStatus.legacyChecksums().add(request.metadata()); final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name()); assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName); store.directory().sync(Collections.singleton(temporaryFileName)); IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name()); assert remove == null || remove == indexOutput; // remove maybe null if we got finished } } channel.sendResponse(TransportResponse.Empty.INSTANCE); }
private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(DELTA_COMPRESSED); // delta-compressed meta.writeLong(data.getFilePointer()); data.writeVInt(PackedInts.VERSION_CURRENT); data.writeVInt(BLOCK_SIZE); final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); for (Number nv : values) { writer.add(nv.longValue()); } writer.finish(); }
// encodes common values in a table, and the rest of the values as exceptions using INDIRECT. // the exceptions should not be accessed very often, since the values are uncommon private void addPatchedTable( FieldInfo field, final Iterable<Number> values, final int numCommonValues, int commonValuesCount, int count, final NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(PATCHED_TABLE); meta.writeLong(data.getFilePointer()); assert numCommonValues == 3 || numCommonValues == 15; FormatAndBits compression = fastestFormatAndBits(numCommonValues); writeTable(values, compression, count, uniqueValues, numCommonValues); meta.writeVInt(field.number); addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues); }
// encodes only uncommon values in a sparse bitset // access is constant time, and the common case is predictable // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2 // values) private void addPatchedBitset( FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues) throws IOException { int commonCount = uniqueValues.freqs[0]; meta.writeVInt(count - commonCount); meta.writeByte(PATCHED_BITSET); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, 0); // write exceptions: only two cases make sense // bpv = 1 (folded into sparse bitset already) // bpv > 1 (add indirect exception table) meta.writeVInt(field.number); if (uniqueValues.size == 2) { // special case: implicit in bitset addConstant(uniqueValues.values[1]); } else { // exception table addIndirect(field, values, count, uniqueValues, 0); } }
@Override public long getFilePointer() { return tempOut.getFilePointer(); }
@Override public long getFilePointer() { return wrapped.getFilePointer(); }
@Override public long getFilePointer() { return delegate.getFilePointer(); }
public final void openDocument() throws IOException { closeDocument(); currentDocPointer = tvd.getFilePointer(); }