SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) { super(tenum); this.terms = terms; this.ords = ords; comparator = BytesRef.getUTF8SortedAsUnicodeComparator(); lastElement = terms.size() - 1; lastTerm = terms.get(ords[lastElement], new BytesRef()); seekTerm = terms.get(ords[upto], spare); }
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { PostingsReaderBase postingsReader = new SepPostingsReader( state.dir, state.segmentInfo, state.context, new MockIntFactory(blockSize), state.segmentSuffix); TermsIndexReaderBase indexReader; boolean success = false; try { indexReader = new FixedGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, IOContext.DEFAULT); success = true; } finally { if (!success) { postingsReader.close(); } } success = false; try { FieldsProducer ret = new BlockTermsReader( indexReader, state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, 1024, state.segmentSuffix); success = true; return ret; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } }
@Override public boolean seekExact(BytesRef text) { termUpto = binarySearch( text, br, 0, info.terms.size() - 1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); return termUpto >= 0; }
TermsIncludingScoreQuery( String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) { this.field = field; this.multipleValuesPerDocument = multipleValuesPerDocument; this.terms = terms; this.scores = scores; this.originalQuery = originalQuery; this.ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); this.unwrittenOriginalQuery = originalQuery; }
private static final class LeafSourceQueue extends PriorityQueue<LeafSource> { private final Comparator<BytesRef> termComp = BytesRef.getUTF8SortedAsUnicodeComparator(); LeafSourceQueue(int size) { super(size); } @Override protected boolean lessThan(LeafSource termsA, LeafSource termsB) { final int cmp = termComp.compare(termsA.current, termsB.current); if (cmp != 0) { return cmp < 0; } else { return termsA.context.ord < termsB.context.ord; } } }
@Override public SeekStatus seekCeil(BytesRef text) { termUpto = binarySearch( text, br, 0, info.terms.size() - 1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); if (termUpto < 0) { // not found; choose successor termUpto = -termUpto - 1; if (termUpto >= info.terms.size()) { return SeekStatus.END; } else { info.terms.get(info.sortedTerms[termUpto], br); return SeekStatus.NOT_FOUND; } } else { return SeekStatus.FOUND; } }
@Override public void build(TermFreqIterator iterator) throws IOException { BytesRef scratch = new BytesRef(); TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); } fst = builder.finish(); }
@Override public Comparator<BytesRef> getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); }
/** * Sorts hashed terms into ascending order, reusing memory along the way. Note that sorting is * lazily delayed until required (often it's not required at all). If a sorted view is required * then hashing + sort + binary search is still faster and smaller than TreeMap usage (which * would be an alternative and somewhat more elegant approach, apart from more sophisticated * Tries / prefix trees). */ public void sortTerms() { if (sortedTerms == null) { sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); } }
/** * @param field The field that should contain terms that are specified in the previous parameter * @param terms The terms that matching documents should have. The terms must be sorted by natural * order. */ TermsQuery(String field, Query fromQuery, BytesRefHash terms) { super(field); this.fromQuery = fromQuery; this.terms = terms; ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); }
@Test public void testInMemorySorter() throws Exception { check(new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator())); }
private IndexIterationContext createContext( int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean scoreDocsInOrder) throws IOException { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.randomUniqueValues = new String[numRandomValues]; Set<String> trackSet = new HashSet<String>(); context.randomFrom = new boolean[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { String uniqueRandomValue; do { uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random()); // uniqueRandomValue = _TestUtil.randomSimpleString(random); } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.add(uniqueRandomValue); context.randomFrom[i] = random().nextBoolean(); context.randomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { String id = Integer.toString(i); int randomI = random().nextInt(context.randomUniqueValues.length); String value = context.randomUniqueValues[randomI]; Document document = new Document(); document.add(newTextField(random(), "id", id, Field.Store.NO)); document.add(newTextField(random(), "value", value, Field.Store.NO)); boolean from = context.randomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)]; docs[i].linkValues.add(linkValue); if (from) { if (!context.fromDocuments.containsKey(linkValue)) { context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueFromDocs.containsKey(value)) { context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>()); } context.fromDocuments.get(linkValue).add(docs[i]); context.randomValueFromDocs.get(value).add(docs[i]); document.add(newTextField(random(), "from", linkValue, Field.Store.NO)); } else { if (!context.toDocuments.containsKey(linkValue)) { context.toDocuments.put(linkValue, new ArrayList<RandomDoc>()); } if (!context.randomValueToDocs.containsKey(value)) { context.randomValueToDocs.put(value, new ArrayList<RandomDoc>()); } context.toDocuments.get(linkValue).add(docs[i]); context.randomValueToDocs.get(value).add(docs[i]); document.add(newTextField(random(), "to", linkValue, Field.Store.NO)); } } final RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.addDocument(document); if (random().nextInt(10) == 4) { w.commit(); } if (VERBOSE) { System.out.println("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all // possible score for // any ScoreMode. IndexSearcher fromSearcher = newSearcher(fromWriter.getReader()); IndexSearcher toSearcher = newSearcher(toWriter.getReader()); for (int i = 0; i < context.randomUniqueValues.length; i++) { String uniqueRandomValue = context.randomUniqueValues[i]; final String fromField; final String toField; final Map<String, Map<Integer, JoinScore>> queryVals; if (context.randomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.fromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.toHitsToJoinScore; } final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private SortedSetDocValues docTermOrds; final BytesRef joinValue = new BytesRef(); @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, joinValue); JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } else { fromSearcher.search( new TermQuery(new Term("value", uniqueRandomValue)), new Collector() { private Scorer scorer; private BinaryDocValues terms; private Bits docsWithField; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) throws IOException { terms.get(doc, spare); BytesRef joinValue = spare; if (joinValue.length == 0 && !docsWithField.get(doc)) { return; } JoinScore joinScore = joinValueToJoinScores.get(joinValue); if (joinScore == null) { joinValueToJoinScores.put( BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } joinScore.addScore(scorer.score()); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true); docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField); } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader()); Terms terms = slowCompositeReader.terms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator()); joinValues.addAll(joinValueToJoinScores.keySet()); for (BytesRef joinValue : joinValues) { termsEnum = terms.iterator(termsEnum); if (termsEnum.seekExact(joinValue)) { docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); JoinScore joinScore = joinValueToJoinScores.get(joinValue); for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(doc)) { docToJoinScore.put(doc, joinScore); } } } } } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private SortedSetDocValues docTermOrds; private final BytesRef scratch = new BytesRef(); private int docBase; @Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.lookupOrd(ord, scratch); JoinScore joinScore = joinValueToJoinScores.get(scratch); if (joinScore == null) { continue; } Integer basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.containsKey(basedDoc)) { docToJoinScore.put(basedDoc, joinScore); } } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { docBase = context.docBase; docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField); } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } } else { toSearcher.search( new MatchAllDocsQuery(), new Collector() { private BinaryDocValues terms; private int docBase; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) { terms.get(doc, spare); JoinScore joinScore = joinValueToJoinScores.get(spare); if (joinScore == null) { return; } docToJoinScore.put(docBase + doc, joinScore); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false); docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setScorer(Scorer scorer) {} }); } queryVals.put(uniqueRandomValue, docToJoinScore); } fromSearcher.getIndexReader().close(); toSearcher.getIndexReader().close(); return context; }
@Override int compareTerm(Terms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator() .compare(termBytes, ((Bucket) other).termBytes); }
// TODO: we may want an alternate mode here which is // "if you are about to return NOT_FOUND I won't use // the terms data from that"; eg FuzzyTermsEnum will // (usually) just immediately call seek again if we // return NOT_FOUND so it's a waste for us to fill in // the term that was actually NOT_FOUND @Override public SeekStatus seekCeil(final BytesRef target) throws IOException { if (indexEnum == null) { throw new IllegalStateException("terms index was not loaded"); } // System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + // target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() // + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" // + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this); if (didIndexNext) { if (nextIndexTerm == null) { // System.out.println(" nextIndexTerm=null"); } else { // System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString()); } } boolean doSeek = true; // See if we can avoid seeking, because target term // is after current term but before next index term: if (indexIsCurrent) { final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term.get(), target); if (cmp == 0) { // Already at the requested term return SeekStatus.FOUND; } else if (cmp < 0) { // Target term is after current term if (!didIndexNext) { if (indexEnum.next() == -1) { nextIndexTerm = null; } else { nextIndexTerm = indexEnum.term(); } // System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null // ? "null" : nextIndexTerm.utf8ToString())); didIndexNext = true; } if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) { // Optimization: requested term is within the // same term block we are now in; skip seeking // (but do scanning): doSeek = false; // System.out.println(" skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" // : nextIndexTerm.utf8ToString())); } } } if (doSeek) { // System.out.println(" seek"); // Ask terms index to find biggest indexed term (= // first term in a block) that's <= our text: in.seek(indexEnum.seek(target)); boolean result = nextBlock(); // Block must exist since, at least, the indexed term // is in the block: assert result; indexIsCurrent = true; didIndexNext = false; if (doOrd) { state.ord = indexEnum.ord() - 1; } term.copyBytes(indexEnum.term()); // System.out.println(" seek: term=" + term.utf8ToString()); } else { // System.out.println(" skip seek"); if (state.termBlockOrd == blockTermCount && !nextBlock()) { indexIsCurrent = false; return SeekStatus.END; } } seekPending = false; int common = 0; // Scan within block. We could do this by calling // _next() and testing the resulting term, but this // is wasteful. Instead, we first confirm the // target matches the common prefix of this block, // and then we scan the term bytes directly from the // termSuffixesreader's byte[], saving a copy into // the BytesRef term per term. Only when we return // do we then copy the bytes into the term. while (true) { // First, see if target term matches common prefix // in this block: if (common < termBlockPrefix) { final int cmp = (term.byteAt(common) & 0xFF) - (target.bytes[target.offset + common] & 0xFF); if (cmp < 0) { // TODO: maybe we should store common prefix // in block header? (instead of relying on // last term of previous block) // Target's prefix is after the common block // prefix, so term cannot be in this block // but it could be in next block. We // must scan to end-of-block to set common // prefix for next block: if (state.termBlockOrd < blockTermCount) { while (state.termBlockOrd < blockTermCount - 1) { state.termBlockOrd++; state.ord++; termSuffixesReader.skipBytes(termSuffixesReader.readVInt()); } final int suffix = termSuffixesReader.readVInt(); term.setLength(termBlockPrefix + suffix); term.grow(term.length()); termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix); } state.ord++; if (!nextBlock()) { indexIsCurrent = false; return SeekStatus.END; } common = 0; } else if (cmp > 0) { // Target's prefix is before the common prefix // of this block, so we position to start of // block and return NOT_FOUND: assert state.termBlockOrd == 0; final int suffix = termSuffixesReader.readVInt(); term.setLength(termBlockPrefix + suffix); term.grow(term.length()); termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix); return SeekStatus.NOT_FOUND; } else { common++; } continue; } // Test every term in this block while (true) { state.termBlockOrd++; state.ord++; final int suffix = termSuffixesReader.readVInt(); // We know the prefix matches, so just compare the new suffix: final int termLen = termBlockPrefix + suffix; int bytePos = termSuffixesReader.getPosition(); boolean next = false; final int limit = target.offset + (termLen < target.length ? termLen : target.length); int targetPos = target.offset + termBlockPrefix; while (targetPos < limit) { final int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF); if (cmp < 0) { // Current term is still before the target; // keep scanning next = true; break; } else if (cmp > 0) { // Done! Current term is after target. Stop // here, fill in real term, return NOT_FOUND. term.setLength(termBlockPrefix + suffix); term.grow(term.length()); termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix); // System.out.println(" NOT_FOUND"); return SeekStatus.NOT_FOUND; } } if (!next && target.length <= termLen) { term.setLength(termBlockPrefix + suffix); term.grow(term.length()); termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix); if (target.length == termLen) { // Done! Exact match. Stop here, fill in // real term, return FOUND. // System.out.println(" FOUND"); return SeekStatus.FOUND; } else { // System.out.println(" NOT_FOUND"); return SeekStatus.NOT_FOUND; } } if (state.termBlockOrd == blockTermCount) { // Must pre-fill term for next block's common prefix term.setLength(termBlockPrefix + suffix); term.grow(term.length()); termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix); break; } else { termSuffixesReader.skipBytes(suffix); } } // The purpose of the terms dict index is to seek // the enum to the closest index term before the // term we are looking for. So, we should never // cross another index term (besides the first // one) while we are scanning: assert indexIsCurrent; if (!nextBlock()) { // System.out.println(" END"); indexIsCurrent = false; return SeekStatus.END; } common = 0; } }
/** * Called once per field per document if term vectors are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to the real term vectors files in the Directory. */ @Override void finish() throws IOException { assert docState.testPoint("TermVectorsTermsWriterPerField.finish start"); final int numPostings = termsHashPerField.bytesHash.size(); final BytesRef flushTerm = perThread.flushTerm; assert numPostings >= 0; if (!doVectors || numPostings == 0) return; if (numPostings > maxNumPostings) maxNumPostings = numPostings; final IndexOutput tvf = perThread.doc.perDocTvf; // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; assert perThread.vectorFieldsInOrder(fieldInfo); perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; // TODO: we may want to make this sort in same order // as Codec's terms dict? final int[] termIDs = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUnicodeComparator()); tvf.writeVInt(numPostings); byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; final ByteSliceReader reader = perThread.vectorSliceReader; final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; for (int j = 0; j < numPostings; j++) { final int termID = termIDs[j]; final int freq = postings.freqs[termID]; // Get BytesRef termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]); // Compute common byte prefix between last term and // this term int prefix = 0; if (j > 0) { while (prefix < lastLen && prefix < flushTerm.length) { if (lastBytes[lastStart + prefix] != flushTerm.bytes[flushTerm.offset + prefix]) { break; } prefix++; } } lastLen = flushTerm.length; lastBytes = flushTerm.bytes; lastStart = flushTerm.offset; final int suffix = flushTerm.length - prefix; tvf.writeVInt(prefix); tvf.writeVInt(suffix); tvf.writeBytes(flushTerm.bytes, lastStart + prefix, suffix); tvf.writeVInt(freq); if (doVectorPositions) { termsHashPerField.initReader(reader, termID, 0); reader.writeTo(tvf); } if (doVectorOffsets) { termsHashPerField.initReader(reader, termID, 1); reader.writeTo(tvf); } } termsHashPerField.reset(); // NOTE: we clear, per-field, at the thread level, // because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large // fields w/ term vectors on) because we recycle/reuse // all RAM after each field: perThread.termsHashPerThread.reset(false); }
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT); final IndexInput in = state.dir.openInput(seedFileName, state.context); final long seed = in.readLong(); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed); } in.close(); final Random random = new Random(seed); int readBufferSize = _TestUtil.nextInt(random, 1, 4096); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader( state.dir, state.segmentInfo, state.context, new MockIntStreamFactory(random), state.segmentSuffix); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene40PostingsReader( state.dir, state.segmentInfo, state.context, state.segmentSuffix); } if (random.nextBoolean()) { final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(postingsReader); } final FieldsProducer fields; if (random.nextBoolean()) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading BlockTree terms dict"); } boolean success = false; try { fields = new BlockTreeTermsReader( state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, state.segmentSuffix, state.termsIndexDivisor); success = true; } finally { if (!success) { postingsReader.close(); } } } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Block terms dict"); } final TermsIndexReaderBase indexReader; boolean success = false; try { final boolean doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: if (state.termsIndexDivisor != -1) { state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); } else { final int n2 = random.nextInt(3); if (n2 == 1) { random.nextInt(); } else if (n2 == 2) { random.nextLong(); } if (LuceneTestCase.VERBOSE) { System.out.println( "MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader( state.dir, state.fieldInfos, state.segmentInfo.name, state.termsIndexDivisor, state.segmentSuffix, state.context); } success = true; } finally { if (!success) { postingsReader.close(); } } final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024); success = false; try { fields = new BlockTermsReader( indexReader, state.dir, state.fieldInfos, state.segmentInfo.name, postingsReader, state.context, termsCacheSize, state.segmentSuffix); success = true; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } } return fields; }
/** * Creates a new sorted wrapper, using {@link BytesRef#getUTF8SortedAsUnicodeComparator} for * sorting. */ public SortedInputIterator(InputIterator source) throws IOException { this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); }
@Override public PerDocProducer docsProducer(SegmentReadState state) throws IOException { return new SimpleTextPerDocProducer( state, BytesRef.getUTF8SortedAsUnicodeComparator(), DOC_VALUES_SEG_SUFFIX); }