public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) { // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an // ordinary codec header: version = CodecUtil.checkIndexHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); } else { version = CodecUtil.checkHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); } FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException( "no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent // unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM // here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
public static void IndexInputTest() throws Exception { String path = "D:\\Lucene Document"; directory = FSDirectory.getDirectory(path); IndexInput indexInput = directory.openInput("segments.gen"); int version = indexInput.readInt(); System.out.println(version); System.out.println(indexInput.readString()); }
// in merge mode we don't uncompress the data of a compressed field private void addFieldForMerge( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { Object data; if (binary || compressed) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); }
private void addField( Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { // we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field( fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field( fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); } }
public Term next() { assert hasNext(); try { int code = input.readVInt(); if ((code & 1) != 0) { // new field field = input.readString(); } int prefix = code >>> 1; int suffix = input.readVInt(); bytes.grow(prefix + suffix); input.readBytes(bytes.bytes, prefix, suffix); bytes.length = prefix + suffix; term.set(field, bytes); return term; } catch (IOException e) { throw new RuntimeException(e); } }
public void testRead() throws IOException { IndexInput is = new MockIndexInput( new byte[] { (byte) 0x80, 0x01, (byte) 0xFF, 0x7F, (byte) 0x80, (byte) 0x80, 0x01, (byte) 0x81, (byte) 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e', // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 0x02, (byte) 0xC2, (byte) 0xBF, 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, 'c', 'e', (byte) 0xC2, (byte) 0xBF, 'n', 'e', // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, 'n', 'e', // surrogate pairs // (U+1D11E "MUSICAL SYMBOL G CLEF") // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 0x0E, 'L', 'u', (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 'c', 'e', (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 'n', 'e', // null bytes 0x01, 0x00, 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e', }); assertEquals(128, is.readVInt()); assertEquals(16383, is.readVInt()); assertEquals(16384, is.readVInt()); assertEquals(16385, is.readVInt()); assertEquals("Lucene", is.readString()); assertEquals("\u00BF", is.readString()); assertEquals("Lu\u00BFce\u00BFne", is.readString()); assertEquals("\u2620", is.readString()); assertEquals("Lu\u2620ce\u2620ne", is.readString()); assertEquals("\uD834\uDD1E", is.readString()); assertEquals("\uD834\uDD1E\uD834\uDD60", is.readString()); assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne", is.readString()); assertEquals("\u0000", is.readString()); assertEquals("Lu\u0000ce\u0000ne", is.readString()); }
@Override public String readString() throws IOException { return delegate.readString(); }