private void writeDoc() throws IOException { if (isFieldOpen()) throw new IllegalStateException("Field is still open while writing document"); // System.out.println("Writing doc pointer: " + currentDocPointer); // write document index record tvx.writeLong(currentDocPointer); // write document data record final int size = fields.size(); // write the number of fields tvd.writeVInt(size); // write field numbers for (int i = 0; i < size; i++) { TVField field = (TVField) fields.elementAt(i); tvd.writeVInt(field.number); } // write field pointers long lastFieldPointer = 0; for (int i = 0; i < size; i++) { TVField field = (TVField) fields.elementAt(i); tvd.writeVLong(field.tvfPointer - lastFieldPointer); lastFieldPointer = field.tvfPointer; } // System.out.println("After writing doc pointer: " + tvx.getFilePointer()); }
// encodes values as sparse array: keys[] and values[] // access is log(N) where N = keys.length (slow!) // so this is only appropriate as an exception table for patched, or when common value is 0 (wont // be accessed by searching) private void addIndirect( FieldInfo field, final Iterable<Number> values, int count, final NormMap uniqueValues, final int minOrd) throws IOException { int commonCount = uniqueValues.freqs[minOrd]; meta.writeVInt(count - commonCount); meta.writeByte(INDIRECT); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, minOrd); // write actual values writeNormsField( field, new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new FilterIterator<Number, Number>(values.iterator()) { @Override protected boolean predicateFunction(Number value) { return uniqueValues.ord(value.byteValue()) > minOrd; } }; } }, 1); }
private void addUncompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(UNCOMPRESSED); // uncompressed byte[] meta.writeLong(data.getFilePointer()); for (Number nv : values) { data.writeByte(nv.byteValue()); } }
private void addTableCompressed( Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(TABLE_COMPRESSED); // table-compressed meta.writeLong(data.getFilePointer()); writeTable(values, compression, count, uniqueValues, uniqueValues.size); }
final void finishCommit(Directory dir) throws IOException { if (pendingSegnOutput == null) throw new IllegalStateException("prepareCommit was not called"); boolean success = false; try { pendingSegnOutput.finishCommit(); pendingSegnOutput.close(); pendingSegnOutput = null; success = true; } finally { if (!success) rollbackCommit(dir); } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.sync(Collections.singleton(fileName)); success = true; } finally { if (!success) { try { dir.deleteFile(fileName); } catch (Throwable t) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.writeInt(FORMAT_LOCKLESS); genOutput.writeLong(generation); genOutput.writeLong(generation); } finally { genOutput.close(); } } catch (ThreadInterruptedException t) { throw t; } catch (Throwable t) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }
private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException { meta.writeVInt(count); meta.writeByte(DELTA_COMPRESSED); // delta-compressed meta.writeLong(data.getFilePointer()); data.writeVInt(PackedInts.VERSION_CURRENT); data.writeVInt(BLOCK_SIZE); final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); for (Number nv : values) { writer.add(nv.longValue()); } writer.finish(); }
@Override public void write( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeHeader( output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.writeVInt(infos.size()); for (FieldInfo fi : infos) { IndexOptions indexOptions = fi.getIndexOptions(); byte bits = 0x0; if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS; if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; if (fi.getIndexOptions() != IndexOptions.NONE) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); if (indexOptions == IndexOptions.DOCS) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.writeString(fi.name); output.writeVInt(fi.number); output.writeByte(bits); // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType()); final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } CodecUtil.writeFooter(output); } }
private void writeDocsWithValue(final Iterable<Number> values, NormMap uniqueValues, int minOrd) throws IOException { data.writeLong(uniqueValues.values[minOrd]); data.writeVInt(PackedInts.VERSION_CURRENT); data.writeVInt(BLOCK_SIZE); // write docs with value final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); int doc = 0; for (Number n : values) { int ord = uniqueValues.ord(n.byteValue()); if (ord > minOrd) { writer.add(doc); } doc++; } writer.finish(); }
// encodes common values in a table, and the rest of the values as exceptions using INDIRECT. // the exceptions should not be accessed very often, since the values are uncommon private void addPatchedTable( FieldInfo field, final Iterable<Number> values, final int numCommonValues, int commonValuesCount, int count, final NormMap uniqueValues) throws IOException { meta.writeVInt(count); meta.writeByte(PATCHED_TABLE); meta.writeLong(data.getFilePointer()); assert numCommonValues == 3 || numCommonValues == 15; FormatAndBits compression = fastestFormatAndBits(numCommonValues); writeTable(values, compression, count, uniqueValues, numCommonValues); meta.writeVInt(field.number); addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues); }
private void insertData(CoherenceDirectory dir, String fileName) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput(fileName); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (dir.getBucketSize() > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
private void insertData(ByteBufferDirectory dir, int bufferSizeInBytes) throws IOException { byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8}; IndexOutput indexOutput = dir.createOutput("value1", IOContext.DEFAULT); indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); indexOutput.writeInt(0); indexOutput.writeInt(0); indexOutput.writeBytes(test, 8); indexOutput.writeBytes(test, 5); indexOutput.seek(0); indexOutput.writeByte((byte) 8); if (bufferSizeInBytes > 4) { indexOutput.seek(2); indexOutput.writeBytes(new byte[] {1, 2}, 2); } indexOutput.close(); }
// encodes only uncommon values in a sparse bitset // access is constant time, and the common case is predictable // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2 // values) private void addPatchedBitset( FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues) throws IOException { int commonCount = uniqueValues.freqs[0]; meta.writeVInt(count - commonCount); meta.writeByte(PATCHED_BITSET); meta.writeLong(data.getFilePointer()); // write docs with value writeDocsWithValue(values, uniqueValues, 0); // write exceptions: only two cases make sense // bpv = 1 (folded into sparse bitset already) // bpv > 1 (add indirect exception table) meta.writeVInt(field.number); if (uniqueValues.size == 2) { // special case: implicit in bitset addConstant(uniqueValues.values[1]); } else { // exception table addIndirect(field, values, count, uniqueValues, 0); } }
protected void writeTrailer(long dirStart) throws IOException { out.seek(CodecUtil.headerLength(CODEC_NAME)); out.writeLong(dirStart); }
protected void writeHeader(IndexOutput out) throws IOException { CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); // Placeholder for dir offset out.writeLong(0); }
@Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: skipInterval=" + skipInterval); } final long seed = seedRandom.nextLong(); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: writing to seg=" + state.segmentName + " formatID=" + state.segmentSuffix + " seed=" + seed); } final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.segmentSuffix, SEED_EXT); final IndexOutput out = state.directory.createOutput(seedFileName, state.context); try { out.writeLong(seed); } finally { out.close(); } final Random random = new Random(seed); random.nextInt(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Standard postings"); } postingsWriter = new Lucene40PostingsWriter(state, skipInterval); } if (random.nextBoolean()) { final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter); } final FieldsConsumer fields; if (random.nextBoolean()) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100); final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100)); boolean success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.close(); } } } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Block terms dict"); } boolean success = false; final TermsIndexWriterBase indexWriter; try { if (random.nextBoolean()) { state.termIndexInterval = _TestUtil.nextInt(random, 1, 100); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { final VariableGapTermsIndexWriter.IndexTermSelector selector; final int n2 = random.nextInt(3); if (n2 == 0) { final int tii = _TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { final int docFreqThresh = _TestUtil.nextInt(random, 2, 100); final int tii = _TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { final long seed2 = random.nextLong(); final int gap = _TestUtil.nextInt(random, 2, 40); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new VariableGapTermsIndexWriter.IndexTermSelector() { final Random rand = new Random(seed2); @Override public boolean isIndexTerm(BytesRef term, TermStats stats) { return rand.nextInt(gap) == gap / 2; } @Override public void newField(FieldInfo fieldInfo) {} }; } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.close(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.close(); } finally { indexWriter.close(); } } } } return fields; }
/** Writes the index file trailer. */ private void writeIndexTrailer(IndexOutput indexOut, long dirStart) throws IOException { indexOut.writeLong(dirStart); }
private void addConstant(byte constant) throws IOException { meta.writeVInt(0); meta.writeByte(CONST_COMPRESSED); meta.writeLong(constant); }
@Override public void writeLong(long i) throws IOException { delegate.writeLong(i); }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }