Lucene50NormsConsumer( SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { boolean success = false; try { String dataName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.createOutput(dataName, state.context); CodecUtil.writeIndexHeader( data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String metaName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, metaExtension); meta = state.directory.createOutput(metaName, state.context); CodecUtil.writeIndexHeader( meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this); } } }
public SepPostingsReader( Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, String segmentSuffix) throws IOException { boolean success = false; try { final String docFileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION); docIn = intFactory.openInput(dir, docFileName, context); skipIn = dir.openInput( IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context); if (fieldInfos.hasFreq()) { freqIn = intFactory.openInput( dir, IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context); } else { freqIn = null; } if (fieldInfos.hasProx()) { posIn = intFactory.openInput( dir, IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context); payloadIn = dir.openInput( IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context); } else { posIn = null; payloadIn = null; } success = true; } finally { if (!success) { close(); } } }
public CompletionFieldsConsumer(SegmentWriteState state) throws IOException { this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state); String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexOutput output = null; boolean success = false; try { output = state.directory.createOutput(suggestFSTFile, state.context); CodecUtil.writeIndexHeader( output, CODEC_NAME, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); /* * we write the delegate postings format name so we can load it * without getting an instance in the ctor */ output.writeString(delegatePostingsFormat.getName()); output.writeString(writeProvider.getName()); this.suggestFieldsConsumer = writeProvider.consumer(output); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); } } }
/** Full constructor */ public Lucene60PointsWriter( SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException { assert writeState.fieldInfos.hasPointValues(); this.writeState = writeState; this.maxPointsInLeafNode = maxPointsInLeafNode; this.maxMBSortInHeap = maxMBSortInHeap; String dataFileName = IndexFileNames.segmentFileName( writeState.segmentInfo.name, writeState.segmentSuffix, Lucene60PointsFormat.DATA_EXTENSION); dataOut = writeState.directory.createOutput(dataFileName, writeState.context); boolean success = false; try { CodecUtil.writeIndexHeader( dataOut, Lucene60PointsFormat.DATA_CODEC_NAME, Lucene60PointsFormat.DATA_VERSION_CURRENT, writeState.segmentInfo.getId(), writeState.segmentSuffix); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(dataOut); } } }
public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
@Override public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException { String filename = IndexFileNames.segmentFileName( state.segmentInfo.name, "nrm", IndexFileNames.COMPOUND_FILE_EXTENSION); return new Lucene40DocValuesWriter( state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); }
public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) { // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an // ordinary codec header: version = CodecUtil.checkIndexHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); } else { version = CodecUtil.checkHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); } FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException( "no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent // unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM // here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
@Override public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); int numFiles = si.files().size(); String names[] = si.files().toArray(new String[numFiles]); Arrays.sort(names); long startOffsets[] = new long[numFiles]; long endOffsets[] = new long[numFiles]; BytesRefBuilder scratch = new BytesRefBuilder(); try (IndexOutput out = dir.createOutput(dataFile, context)) { for (int i = 0; i < names.length; i++) { // write header for file SimpleTextUtil.write(out, HEADER); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); // write bytes for file startOffsets[i] = out.getFilePointer(); try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) { out.copyBytes(in, in.length()); } endOffsets[i] = out.getFilePointer(); } long tocPos = out.getFilePointer(); // write CFS table SimpleTextUtil.write(out, TABLE); SimpleTextUtil.write(out, Integer.toString(numFiles), scratch); SimpleTextUtil.writeNewline(out); for (int i = 0; i < names.length; i++) { SimpleTextUtil.write(out, TABLENAME); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLESTART); SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLEEND); SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); } DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); SimpleTextUtil.write(out, TABLEPOS); SimpleTextUtil.write(out, df.format(tocPos), scratch); SimpleTextUtil.writeNewline(out); } }
@Override public void write( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeHeader( output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.writeVInt(infos.size()); for (FieldInfo fi : infos) { IndexOptions indexOptions = fi.getIndexOptions(); byte bits = 0x0; if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS; if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; if (fi.getIndexOptions() != IndexOptions.NONE) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); if (indexOptions == IndexOptions.DOCS) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.writeString(fi.name); output.writeVInt(fi.number); output.writeByte(bits); // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType()); final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } CodecUtil.writeFooter(output); } }
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException { final String indexFileName = IndexFileNames.segmentFileName( state.segmentName, state.segmentSuffix, TERMS_INDEX_EXTENSION); out = state.directory.createOutput(indexFileName, state.context); boolean success = false; try { fieldInfos = state.fieldInfos; this.policy = policy; writeHeader(out); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(out); } } }
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException { boolean success = false; try { in = directory.openInput( IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { close(); } catch (Throwable t) { } // ensure we throw our original exception } } readIndex(si.maxDoc()); }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; final IndexInput in = state.directory.openInput(termsFileName, state.context); boolean success = false; try { version = readHeader(in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.checksumEntireFile(in); } this.postingsReader.init(in); seekDir(in); final FieldInfos fieldInfos = state.fieldInfos; final int numFields = in.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = in.readVInt(); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); long numTerms = in.readVLong(); long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); long sumDocFreq = in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); TermsReader current = new TermsReader( fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, in, current, previous); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } }
public SimpleTextStoredFieldsReader( Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { this.fieldInfos = fn; boolean success = false; try { in = directory.openInput( IndexFileNames.segmentFileName( si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION), context); success = true; } finally { if (!success) { try { close(); } catch (Throwable t) { } // ensure we throw our original exception } } readIndex(si.maxDoc()); }
@Override public void files(SegmentInfo segmentInfo, String segmentSuffix, Set<String> files) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SEED_EXT); files.add(seedFileName); SepPostingsReader.files(segmentInfo, segmentSuffix, files); Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); BlockTermsReader.files(segmentInfo, segmentSuffix, files); BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); VariableGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); // hackish! Iterator<String> it = files.iterator(); while (it.hasNext()) { final String file = it.next(); if (!segmentInfo.dir.fileExists(file)) { it.remove(); } } // System.out.println("MockRandom.files return " + files); }
@Override public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { ensureOpen(); // Make the file first... RAMFile file = newRAMFile(); // ... then try to find a unique name for it: while (true) { String name = IndexFileNames.segmentFileName( prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp"); if (fileMap.putIfAbsent(name, file) == null) { return new RAMOutputStream(name, file, true); } } }
@Override public void close() throws IOException { delegateFieldsConsumer.close(); // Now we are done accumulating values for these fields List<Entry<FieldInfo, FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo, FuzzySet>>(); for (Entry<FieldInfo, FuzzySet> entry : bloomFilters.entrySet()) { FuzzySet bloomFilter = entry.getValue(); if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) { nonSaturatedBlooms.add(entry); } } String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexOutput bloomOutput = null; try { bloomOutput = state.directory.createOutput(bloomFileName, state.context); CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION); // remember the name of the postings format we will delegate to bloomOutput.writeString(delegatePostingsFormat.getName()); // First field in the output file is the number of fields+blooms saved bloomOutput.writeInt(nonSaturatedBlooms.size()); for (Entry<FieldInfo, FuzzySet> entry : nonSaturatedBlooms) { FieldInfo fieldInfo = entry.getKey(); FuzzySet bloomFilter = entry.getValue(); bloomOutput.writeInt(fieldInfo.number); saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); } } finally { IOUtils.close(bloomOutput); } // We are done with large bitsets so no need to keep them hanging around bloomFilters.clear(); }
@Override public void finish() throws IOException { if (finished) { throw new IllegalStateException("already finished"); } finished = true; CodecUtil.writeFooter(dataOut); String indexFileName = IndexFileNames.segmentFileName( writeState.segmentInfo.name, writeState.segmentSuffix, Lucene60PointsFormat.INDEX_EXTENSION); // Write index file try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) { CodecUtil.writeIndexHeader( indexOut, Lucene60PointsFormat.META_CODEC_NAME, Lucene60PointsFormat.INDEX_VERSION_CURRENT, writeState.segmentInfo.getId(), writeState.segmentSuffix); int count = indexFPs.size(); indexOut.writeVInt(count); for (Map.Entry<String, Long> ent : indexFPs.entrySet()) { FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey()); if (fieldInfo == null) { throw new IllegalStateException( "wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos"); } indexOut.writeVInt(fieldInfo.number); indexOut.writeVLong(ent.getValue()); } CodecUtil.writeFooter(indexOut); } }
/** * prunes the list of index files such that only the latest del generation files are contained. */ private void pruneOldDeleteGenerations(Set<Path> files) { final TreeSet<Path> delFiles = new TreeSet<>(); for (Path file : files) { if (file.getFileName().toString().endsWith(".liv")) { delFiles.add(file); } } Path last = null; for (Path current : delFiles) { if (last != null) { final String newSegmentName = IndexFileNames.parseSegmentName(current.getFileName().toString()); final String oldSegmentName = IndexFileNames.parseSegmentName(last.getFileName().toString()); if (newSegmentName.equals(oldSegmentName)) { int oldGen = Integer.parseInt( IndexFileNames.stripExtension( IndexFileNames.stripSegmentName(last.getFileName().toString())) .replace("_", ""), Character.MAX_RADIX); int newGen = Integer.parseInt( IndexFileNames.stripExtension( IndexFileNames.stripSegmentName(current.getFileName().toString())) .replace("_", ""), Character.MAX_RADIX); if (newGen > oldGen) { files.remove(last); } else { files.remove(current); continue; } } } last = current; } }
@Override public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); final IndexInput in = dir.openInput(dataFile, context); BytesRefBuilder scratch = new BytesRefBuilder(); // first get to TOC: DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1; in.seek(pos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEPOS); long tablePos = -1; try { tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue(); } catch (ParseException e) { throw new CorruptIndexException( "can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in); } // seek to TOC and read it in.seek(tablePos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLE); int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE)); final String fileNames[] = new String[numEntries]; final long startOffsets[] = new long[numEntries]; final long endOffsets[] = new long[numEntries]; for (int i = 0; i < numEntries; i++) { SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLENAME); fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME)); if (i > 0) { // files must be unique and in sorted order assert fileNames[i].compareTo(fileNames[i - 1]) > 0; } SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLESTART); startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART)); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEEND); endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND)); } return new Directory() { private int getIndex(String name) throws IOException { int index = Arrays.binarySearch(fileNames, name); if (index < 0) { throw new FileNotFoundException( "No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")"); } return index; } @Override public String[] listAll() throws IOException { ensureOpen(); return fileNames.clone(); } @Override public long fileLength(String name) throws IOException { ensureOpen(); int index = getIndex(name); return endOffsets[index] - startOffsets[index]; } @Override public IndexInput openInput(String name, IOContext context) throws IOException { ensureOpen(); int index = getIndex(name); return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]); } @Override public void close() throws IOException { in.close(); } // write methods: disabled @Override public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); } @Override public void sync(Collection<String> names) { throw new UnsupportedOperationException(); } @Override public void deleteFile(String name) { throw new UnsupportedOperationException(); } @Override public void renameFile(String source, String dest) { throw new UnsupportedOperationException(); } @Override public Lock makeLock(String name) { throw new UnsupportedOperationException(); } }; }
Lucene42DocValuesProducer( SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.maxDoc(); merging = false; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); boolean success = false; ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new HashMap<>(); binaries = new HashMap<>(); fsts = new HashMap<>(); numEntries = readFields(in, state.fieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.checkFooter(in); } else { CodecUtil.checkEOF(in); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); this.data = state.directory.openInput(dataName, state.context); success = false; try { final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException( "Format versions mismatch: meta=" + version + ", data=" + version2, data); } if (version >= VERSION_CHECKSUM) { // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(data); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this.data); } } }
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT); final IndexInput in = state.dir.openInput(seedFileName, state.context); final long seed = in.readLong(); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed); } in.close(); final Random random = new Random(seed); int readBufferSize = _TestUtil.nextInt(random, 1, 4096); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader( state.dir, state.segmentInfo, state.context, new MockIntStreamFactory(random), state.segmentSuffix); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene40PostingsReader( state.dir, state.segmentInfo, state.context, state.segmentSuffix); } if (random.nextBoolean()) { final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(postingsReader); } final FieldsProducer fields; if (random.nextBoolean()) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading BlockTree terms dict"); } boolean success = false; try { fields = new BlockTreeTermsReader( state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, state.segmentSuffix, state.termsIndexDivisor); success = true; } finally { if (!success) { postingsReader.close(); } } } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Block terms dict"); } final TermsIndexReaderBase indexReader; boolean success = false; try { final boolean doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: if (state.termsIndexDivisor != -1) { state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); } else { final int n2 = random.nextInt(3); if (n2 == 1) { random.nextInt(); } else if (n2 == 2) { random.nextLong(); } if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, state.segmentSuffix, state.context); } success = true; } finally { if (!success) { postingsReader.close(); } } final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024); success = false; try { fields = new BlockTermsReader( indexReader, state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, termsCacheSize, state.segmentSuffix); success = true; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } } return fields; }
/** * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the * min. */ public BlockTreeTermsWriter( SegmentWriteState state, PostingsWriterBase postingsWriter, int minItemsInBlock, int maxItemsInBlock) throws IOException { if (minItemsInBlock <= 1) { throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock); } if (maxItemsInBlock <= 0) { throw new IllegalArgumentException("maxItemsInBlock must be >= 1; got " + maxItemsInBlock); } if (minItemsInBlock > maxItemsInBlock) { throw new IllegalArgumentException( "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock); } if (2 * (minItemsInBlock - 1) > maxItemsInBlock) { throw new IllegalArgumentException( "maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock); } final String termsFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION); out = state.directory.createOutput(termsFileName, state.context); boolean success = false; IndexOutput indexOut = null; try { fieldInfos = state.fieldInfos; this.minItemsInBlock = minItemsInBlock; this.maxItemsInBlock = maxItemsInBlock; writeHeader(out); // DEBUG = state.segmentName.equals("_4a"); final String termsIndexFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexOut = state.directory.createOutput(termsIndexFileName, state.context); writeIndexHeader(indexOut); currentField = null; this.postingsWriter = postingsWriter; // segment = state.segmentName; // System.out.println("BTW.init seg=" + state.segmentName); postingsWriter.start(out); // have consumer write its format/header success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(out, indexOut); } } this.indexOut = indexOut; }
@Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: skipInterval=" + skipInterval); } final long seed = seedRandom.nextLong(); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: writing to seg=" + state.segmentName + " formatID=" + state.segmentSuffix + " seed=" + seed); } final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.segmentSuffix, SEED_EXT); final IndexOutput out = state.directory.createOutput(seedFileName, state.context); try { out.writeLong(seed); } finally { out.close(); } final Random random = new Random(seed); random.nextInt(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Standard postings"); } postingsWriter = new Lucene40PostingsWriter(state, skipInterval); } if (random.nextBoolean()) { final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter); } final FieldsConsumer fields; if (random.nextBoolean()) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100); final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100)); boolean success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.close(); } } } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Block terms dict"); } boolean success = false; final TermsIndexWriterBase indexWriter; try { if (random.nextBoolean()) { state.termIndexInterval = _TestUtil.nextInt(random, 1, 100); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { final VariableGapTermsIndexWriter.IndexTermSelector selector; final int n2 = random.nextInt(3); if (n2 == 0) { final int tii = _TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { final int docFreqThresh = _TestUtil.nextInt(random, 2, 100); final int tii = _TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { final long seed2 = random.nextLong(); final int gap = _TestUtil.nextInt(random, 2, 40); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new VariableGapTermsIndexWriter.IndexTermSelector() { final Random rand = new Random(seed2); @Override public boolean isIndexTerm(BytesRef term, TermStats stats) { return rand.nextInt(gap) == gap / 2; } @Override public void newField(FieldInfo fieldInfo) {} }; } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.close(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.close(); } finally { indexWriter.close(); } } } } return fields; }
@Override public FieldInfos read( Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName( segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { int codecVersion = CodecUtil.checkHeader( input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT); final int size = input.readVInt(); // read in the size FieldInfo infos[] = new FieldInfo[size]; for (int i = 0; i < size; i++) { String name = input.readString(); final int fieldNumber = input.readVInt(); if (fieldNumber < 0) { throw new CorruptIndexException( "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input); } byte bits = input.readByte(); boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; final IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS; } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.readByte(); final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map<String, String> attributes = input.readStringStringMap(); if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) { // Undead norms! Lucene42NormsProducer will check this and bring norms back from the // dead: UndeadNormsProducer.setUndead(attributes); } infos[i] = new FieldInfo( name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); infos[i].checkConsistency(); } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { CodecUtil.checkFooter(input); } else { CodecUtil.checkEOF(input); } return new FieldInfos(infos); } }
/** Sole constructor. */ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null; this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader( termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader( indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put( fieldInfo.name, new FieldReader( this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
public BlockTermsReader( TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { this.postingsReader = postingsReader; String filename = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION); in = state.directory.openInput(filename, state.context); boolean success = false; try { CodecUtil.checkIndexHeader( in, BlockTermsWriter.CODEC_NAME, BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // Have PostingsReader init itself postingsReader.init(in, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(in); // Read per-field details seekDir(in); final int numFields = in.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid number of fields: " + numFields, in); } for (int i = 0; i < numFields; i++) { final int field = in.readVInt(); final long numTerms = in.readVLong(); assert numTerms >= 0; final long termsStartPointer = in.readVLong(); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); final long sumDocFreq = in.readVLong(); final int docCount = in.readVInt(); final int longsSize = in.readVInt(); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } FieldReader previous = fields.put( fieldInfo.name, new FieldReader( fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in); } } success = true; } finally { if (!success) { in.close(); } } this.indexReader = indexReader; }
Lucene70NormsProducer( SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.maxDoc(); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); int version = -1; // read in the entries from the metadata file. try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) { Throwable priorE = null; try { version = CodecUtil.checkIndexHeader( in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); readFields(in, state.fieldInfos); } catch (Throwable exception) { priorE = exception; } finally { CodecUtil.checkFooter(in, priorE); } } String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.openInput(dataName, state.context); boolean success = false; try { final int version2 = CodecUtil.checkIndexHeader( data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); if (version != version2) { throw new CorruptIndexException( "Format versions mismatch: meta=" + version + ",data=" + version2, data); } // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(data); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this.data); } } }