@Override protected void doSetNextReader(LeafReaderContext context) throws IOException { if (segmentFacetCounts != null) { segmentResults.add(createSegmentResult()); } groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField); facetFieldTermsIndex = DocValues.getSorted(context.reader(), facetField); // 1+ to allow for the -1 "not set": segmentFacetCounts = new int[facetFieldTermsIndex.getValueCount() + 1]; segmentTotalCount = 0; segmentGroupedFacetHits.clear(); for (GroupedFacetHit groupedFacetHit : groupedFacetHits) { int facetOrd = groupedFacetHit.facetValue == null ? -1 : facetFieldTermsIndex.lookupTerm(groupedFacetHit.facetValue); if (groupedFacetHit.facetValue != null && facetOrd < 0) { continue; } int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue); if (groupedFacetHit.groupValue != null && groupOrd < 0) { continue; } int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.getValueCount() + 1) + facetOrd; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); } if (facetPrefix != null) { startFacetOrd = facetFieldTermsIndex.lookupTerm(facetPrefix); if (startFacetOrd < 0) { // Points to the ord one higher than facetPrefix startFacetOrd = -startFacetOrd - 1; } BytesRefBuilder facetEndPrefix = new BytesRefBuilder(); facetEndPrefix.append(facetPrefix); facetEndPrefix.append(UnicodeUtil.BIG_TERM); endFacetOrd = facetFieldTermsIndex.lookupTerm(facetEndPrefix.get()); assert endFacetOrd < 0; endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix } else { startFacetOrd = -1; endFacetOrd = facetFieldTermsIndex.getValueCount(); } }
@Override public void collect(int doc) throws IOException { if (doc > facetFieldTermsIndex.docID()) { facetFieldTermsIndex.advance(doc); } int facetOrd; if (doc == facetFieldTermsIndex.docID()) { facetOrd = facetFieldTermsIndex.ordValue(); } else { facetOrd = -1; } if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) { return; } if (doc > groupFieldTermsIndex.docID()) { groupFieldTermsIndex.advance(doc); } int groupOrd; if (doc == groupFieldTermsIndex.docID()) { groupOrd = groupFieldTermsIndex.ordValue(); } else { groupOrd = -1; } int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.getValueCount() + 1) + facetOrd; if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { return; } segmentTotalCount++; segmentFacetCounts[facetOrd + 1]++; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); BytesRef groupKey; if (groupOrd == -1) { groupKey = null; } else { groupKey = BytesRef.deepCopyOf(groupFieldTermsIndex.lookupOrd(groupOrd)); } BytesRef facetKey; if (facetOrd == -1) { facetKey = null; } else { facetKey = BytesRef.deepCopyOf(facetFieldTermsIndex.lookupOrd(facetOrd)); } groupedFacetHits.add(new GroupedFacetHit(groupKey, facetKey)); }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), field); // figure out what ord maps to true int nord = sindex.getValueCount(); BytesRef br = new BytesRef(); // if no values in the segment, default trueOrd to something other then -1 (missing) int tord = -2; for (int i = 0; i < nord; i++) { sindex.lookupOrd(i, br); if (br.length == 1 && br.bytes[br.offset] == 'T') { tord = i; break; } } final int trueOrd = tord; return new BoolDocValues(this) { @Override public boolean boolVal(int doc) { return sindex.getOrd(doc) == trueOrd; } @Override public boolean exists(int doc) { return sindex.getOrd(doc) != -1; } @Override public ValueFiller getValueFiller() { return new ValueFiller() { private final MutableValueBool mval = new MutableValueBool(); @Override public MutableValue getValue() { return mval; } @Override public void fillValue(int doc) { int ord = sindex.getOrd(doc); mval.value = (ord == trueOrd); mval.exists = (ord != -1); } }; } }; }
@Override public Weight createWeight(IndexSearcher searcher) throws IOException { SearchContext sc = SearchContext.current(); IndexParentChildFieldData globalIfd = parentChildIndexFieldData.loadGlobal(searcher.getIndexReader()); assert rewrittenChildQuery != null; assert rewriteIndexReader == searcher.getIndexReader() : "not equal, rewriteIndexReader=" + rewriteIndexReader + " searcher.getIndexReader()=" + searcher.getIndexReader(); final long valueCount; List<LeafReaderContext> leaves = searcher.getIndexReader().leaves(); if (globalIfd == null || leaves.isEmpty()) { return Queries.newMatchNoDocsQuery().createWeight(searcher); } else { AtomicParentChildFieldData afd = globalIfd.load(leaves.get(0)); SortedDocValues globalValues = afd.getOrdinalsValues(parentType); valueCount = globalValues.getValueCount(); } if (valueCount == 0) { return Queries.newMatchNoDocsQuery().createWeight(searcher); } Query childQuery = rewrittenChildQuery; IndexSearcher indexSearcher = new IndexSearcher(searcher.getIndexReader()); indexSearcher.setSimilarity(searcher.getSimilarity()); ParentOrdCollector collector = new ParentOrdCollector(globalIfd, valueCount, parentType); indexSearcher.search(childQuery, collector); final long remaining = collector.foundParents(); if (remaining == 0) { return Queries.newMatchNoDocsQuery().createWeight(searcher); } Filter shortCircuitFilter = null; if (remaining <= shortCircuitParentDocSet) { shortCircuitFilter = ParentIdsFilter.createShortCircuitFilter( nonNestedDocsFilter, sc, parentType, collector.values, collector.parentOrds, remaining); } return new ParentWeight(parentFilter, globalIfd, shortCircuitFilter, collector, remaining); }
/** accumulates per-segment single-valued facet counts */ static void accumSingle( int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost() * 10)) { // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): // collect separately per-segment, then map to global ords accumSingleSeg(counts, si, disi, subIndex, map); } else { // otherwise: do collect+map on the fly accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map); } }
/** * "typical" single-valued faceting: not too many unique values, no prefixing. maps to global * ordinals as a separate step */ static void accumSingleSeg( int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1 + si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segCounts[1 + si.getOrd(doc)]++; } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
@Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { final int off = readerContext.docBase; final LeafReader r; Object o = context.get("searcher"); if (o instanceof SolrIndexSearcher) { SolrIndexSearcher is = (SolrIndexSearcher) o; SchemaField sf = is.getSchema().getFieldOrNull(field); if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumericType() != null) { // it's a single-valued numeric field: we must currently create insanity :( List<LeafReaderContext> leaves = is.getIndexReader().leaves(); LeafReader insaneLeaves[] = new LeafReader[leaves.size()]; int upto = 0; for (LeafReaderContext raw : leaves) { insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field); } r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves)); } else { // reuse ordinalmap r = ((SolrIndexSearcher) o).getLeafReader(); } } else { IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader(); r = SlowCompositeReaderWrapper.wrap(topReader); } // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN); final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override public int intVal(int doc) { return (end - sindex.getOrd(doc + off) - 1); } }; }
public CollapsingScoreCollector( int maxDoc, int segments, SortedDocValues values, int nullPolicy, IntIntOpenHashMap boostDocs) { this.maxDoc = maxDoc; this.contexts = new LeafReaderContext[segments]; this.collapsedSet = new FixedBitSet(maxDoc); this.boostDocs = boostDocs; if (this.boostDocs != null) { // Set the elevated docs now. IntOpenHashSet boostG = new IntOpenHashSet(); Iterator<IntIntCursor> it = this.boostDocs.iterator(); while (it.hasNext()) { IntIntCursor cursor = it.next(); int i = cursor.key; this.collapsedSet.set(i); int ord = values.getOrd(i); if (ord > -1) { boostG.add(ord); } } boostOrds = boostG.toArray(); Arrays.sort(boostOrds); } this.values = values; int valueCount = values.getValueCount(); this.ords = new int[valueCount]; Arrays.fill(this.ords, -1); this.scores = new float[valueCount]; Arrays.fill(this.scores, -Float.MAX_VALUE); this.nullPolicy = nullPolicy; if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { nullScores = new FloatArrayList(); } }
/** * Merges the sorted docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { int ord = dv.getOrd(i); if (ord >= 0) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { int currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); int segmentOrd = (int) map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; int segOrd = dvs[readerUpto].getOrd(docIDUpto); nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd); docIDUpto++; return true; } docIDUpto++; } } }; } }); }
public CollapsingFieldValueCollector( int maxDoc, int segments, SortedDocValues values, int nullPolicy, String field, boolean max, boolean needsScores, FieldType fieldType, IntIntOpenHashMap boostDocs, FunctionQuery funcQuery, IndexSearcher searcher) throws IOException { this.maxDoc = maxDoc; this.contexts = new LeafReaderContext[segments]; this.values = values; int valueCount = values.getValueCount(); this.nullPolicy = nullPolicy; this.needsScores = needsScores; this.boostDocs = boostDocs; if (funcQuery != null) { this.fieldValueCollapse = new ValueSourceCollapse( maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, funcQuery, searcher, values); } else { if (fieldType instanceof TrieIntField) { this.fieldValueCollapse = new IntValueCollapse( maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else if (fieldType instanceof TrieLongField) { this.fieldValueCollapse = new LongValueCollapse( maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else if (fieldType instanceof TrieFloatField) { this.fieldValueCollapse = new FloatValueCollapse( maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else { throw new IOException("min/max must be either TrieInt, TrieLong or TrieFloat."); } } }
public void testDocValuesMemoryIndexVsNormalIndex() throws Exception { Document doc = new Document(); long randomLong = random().nextLong(); doc.add(new NumericDocValuesField("numeric", randomLong)); if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } int numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomLong = random().nextLong(); doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); } if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } } BytesRef randomTerm = new BytesRef(randomTerm()); doc.add(new BinaryDocValuesField("binary", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("binary", randomTerm, Field.Store.NO)); } randomTerm = new BytesRef(randomTerm()); doc.add(new SortedDocValuesField("sorted", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("sorted", randomTerm, Field.Store.NO)); } numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomTerm = new BytesRef(randomTerm()); doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); } if (random().nextBoolean()) { // randomily just add a normal string field doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO)); } } MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); LeafReader leafReader = indexReader.leaves().get(0).reader(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric"); NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric"); assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0)); SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric"); sortedNumericDocValues.setDocument(0); SortedNumericDocValues controlSortedNumericDocValues = controlLeafReader.getSortedNumericDocValues("sorted_numeric"); controlSortedNumericDocValues.setDocument(0); assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count()); for (int i = 0; i < controlSortedNumericDocValues.count(); i++) { assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i)); } BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary"); BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary"); assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0)); SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted"); SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted"); assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount()); assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0)); assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0)); assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0)); SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set"); sortedSetDocValues.setDocument(0); SortedSetDocValues controlSortedSetDocValues = controlLeafReader.getSortedSetDocValues("sorted_set"); controlSortedSetDocValues.setDocument(0); assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount()); for (long controlOrd = controlSortedSetDocValues.nextOrd(); controlOrd != SortedSetDocValues.NO_MORE_ORDS; controlOrd = controlSortedSetDocValues.nextOrd()) { assertEquals(controlOrd, sortedSetDocValues.nextOrd()); assertEquals( controlSortedSetDocValues.lookupOrd(controlOrd), sortedSetDocValues.lookupOrd(controlOrd)); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd()); indexReader.close(); controlIndexReader.close(); dir.close(); }