public FieldValueCollapse( int maxDoc, String field, int nullPolicy, boolean max, boolean needsScores, IntIntOpenHashMap boostDocs, SortedDocValues values) { this.field = field; this.nullPolicy = nullPolicy; this.max = max; this.needsScores = needsScores; this.collapsedSet = new FixedBitSet(maxDoc); this.boostDocs = boostDocs; if (this.boostDocs != null) { IntOpenHashSet boostG = new IntOpenHashSet(); Iterator<IntIntCursor> it = boostDocs.iterator(); while (it.hasNext()) { IntIntCursor cursor = it.next(); int i = cursor.key; this.collapsedSet.set(i); int ord = values.getOrd(i); if (ord > -1) { boostG.add(ord); } } this.boostOrds = boostG.toArray(); Arrays.sort(this.boostOrds); } }
@Override public void collect(int docId) throws IOException { int globalDoc = docId + this.docBase; int ord = values.getOrd(globalDoc); if (ord > -1) { float score = scorer.score(); if (score > scores[ord]) { ords[ord] = globalDoc; scores[ord] = score; } } else if (this.collapsedSet.get(globalDoc)) { // The doc is elevated so score does not matter // We just want to be sure it doesn't fall into the null policy } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { float score = scorer.score(); if (score > nullScore) { nullScore = score; nullDoc = globalDoc; } } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { collapsedSet.set(globalDoc); nullScores.add(scorer.score()); } }
@Test public void testSvValues() throws IOException { int numDocs = 1000000; int numOrdinals = numDocs / 4; Map<Integer, Long> controlDocToOrdinal = new HashMap<>(); OrdinalsBuilder builder = new OrdinalsBuilder(numDocs); long ordinal = builder.currentOrdinal(); for (int doc = 0; doc < numDocs; doc++) { if (doc % numOrdinals == 0) { ordinal = builder.nextOrdinal(); } controlDocToOrdinal.put(doc, ordinal); builder.addDoc(doc); } Ordinals ords = builder.build(ImmutableSettings.EMPTY); assertThat(ords, instanceOf(SinglePackedOrdinals.class)); RandomAccessOrds docs = ords.ordinals(); final SortedDocValues singleOrds = DocValues.unwrapSingleton(docs); assertNotNull(singleOrds); for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) { assertThat(entry.getValue(), equalTo((long) singleOrds.getOrd(entry.getKey()))); } }
@Override public void collect(int doc) throws IOException { if (values != null) { int globalOrdinal = values.getOrd(doc); // TODO: oversize the long bitset and remove the branch if (globalOrdinal >= 0) { parentOrds.set(globalOrdinal); } } }
public void finish() throws IOException { if (contexts.length == 0) { return; } int currentContext = 0; int currentDocBase = 0; int nextDocBase = currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc; leafDelegate = delegate.getLeafCollector(contexts[currentContext]); DummyScorer dummy = new DummyScorer(); leafDelegate.setScorer(dummy); DocIdSetIterator it = new BitSetIterator(fieldValueCollapse.getCollapsedSet(), 0); // cost is not useful here int docId = -1; int nullScoreIndex = 0; float[] scores = fieldValueCollapse.getScores(); FloatArrayList nullScores = fieldValueCollapse.getNullScores(); float nullScore = fieldValueCollapse.getNullScore(); while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (this.needsScores) { int ord = values.getOrd(docId); if (ord > -1) { dummy.score = scores[ord]; } else if (boostDocs != null && boostDocs.containsKey(docId)) { // It's an elevated doc so no score is needed dummy.score = 0F; } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { dummy.score = nullScore; } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { dummy.score = nullScores.get(nullScoreIndex++); } } while (docId >= nextDocBase) { currentContext++; currentDocBase = contexts[currentContext].docBase; nextDocBase = currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc; leafDelegate = delegate.getLeafCollector(contexts[currentContext]); leafDelegate.setScorer(dummy); } int contextDoc = docId - currentDocBase; dummy.docId = contextDoc; leafDelegate.collect(contextDoc); } if (delegate instanceof DelegatingCollector) { ((DelegatingCollector) delegate).finish(); } }
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); if (values != null) { int segmentOrd = values.getOrd(doc); if (segmentOrd != -1) { BytesRef joinValue = values.lookupOrd(segmentOrd); return Explanation.match( queryNorm, "Score based on join value " + joinValue.utf8ToString()); } } return Explanation.noMatch("Not a match"); }
@Override protected boolean match(int doc) { if (parentWeight.remaining == 0) { throw new CollectionTerminatedException(); } long parentOrd = ordinals.getOrd(doc); if (parentOrd >= 0) { boolean match = parentOrds.get(parentOrd); if (match) { parentWeight.remaining--; } return match; } return false; }
// specialized since the single-valued case is different static void accumSingle( int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null && term >= 0) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < counts.length) counts[arrIdx]++; } }
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); if (values != null) { int segmentOrd = values.getOrd(doc); if (segmentOrd != -1) { final float score; if (globalOrds != null) { long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd); score = collector.score((int) globalOrd); } else { score = collector.score(segmentOrd); } BytesRef joinValue = values.lookupOrd(segmentOrd); return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString()); } } return Explanation.noMatch("Not a match"); }
/** * "typical" single-valued faceting: not too many unique values, no prefixing. maps to global * ordinals as a separate step */ static void accumSingleSeg( int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1 + si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segCounts[1 + si.getOrd(doc)]++; } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
public CollapsingScoreCollector( int maxDoc, int segments, SortedDocValues values, int nullPolicy, IntIntOpenHashMap boostDocs) { this.maxDoc = maxDoc; this.contexts = new LeafReaderContext[segments]; this.collapsedSet = new FixedBitSet(maxDoc); this.boostDocs = boostDocs; if (this.boostDocs != null) { // Set the elevated docs now. IntOpenHashSet boostG = new IntOpenHashSet(); Iterator<IntIntCursor> it = this.boostDocs.iterator(); while (it.hasNext()) { IntIntCursor cursor = it.next(); int i = cursor.key; this.collapsedSet.set(i); int ord = values.getOrd(i); if (ord > -1) { boostG.add(ord); } } boostOrds = boostG.toArray(); Arrays.sort(boostOrds); } this.values = values; int valueCount = values.getValueCount(); this.ords = new int[valueCount]; Arrays.fill(this.ords, -1); this.scores = new float[valueCount]; Arrays.fill(this.scores, -Float.MAX_VALUE); this.nullPolicy = nullPolicy; if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { nullScores = new FloatArrayList(); } }
/** * Merges the sorted docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { int ord = dv.getOrd(i); if (ord >= 0) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { int currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); int segmentOrd = (int) map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; int segOrd = dvs[readerUpto].getOrd(docIDUpto); nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd); docIDUpto++; return true; } docIDUpto++; } } }; } }); }
public void collect(int docId) throws IOException { int globalDoc = docId + this.docBase; int ord = values.getOrd(globalDoc); fieldValueCollapse.collapse(ord, docId, globalDoc); }
@Override public void finish() throws IOException { if (contexts.length == 0) { return; } if (nullScore > 0) { this.collapsedSet.set(nullDoc); } if (this.boostOrds != null) { for (int i = 0; i < this.boostOrds.length; i++) { ords[boostOrds[i]] = -1; } } for (int i = 0; i < ords.length; i++) { int doc = ords[i]; if (doc > -1) { collapsedSet.set(doc); } } int currentContext = 0; int currentDocBase = 0; int nextDocBase = currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc; leafDelegate = delegate.getLeafCollector(contexts[currentContext]); DummyScorer dummy = new DummyScorer(); leafDelegate.setScorer(dummy); DocIdSetIterator it = new BitSetIterator(collapsedSet, 0L); // cost is not useful here int docId = -1; int nullScoreIndex = 0; while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int ord = values.getOrd(docId); if (ord > -1) { dummy.score = scores[ord]; } else if (this.boostDocs != null && boostDocs.containsKey(docId)) { // Elevated docs don't need a score. dummy.score = 0F; } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { dummy.score = nullScore; } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { dummy.score = nullScores.get(nullScoreIndex++); } while (docId >= nextDocBase) { currentContext++; currentDocBase = contexts[currentContext].docBase; nextDocBase = currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc; leafDelegate = delegate.getLeafCollector(contexts[currentContext]); leafDelegate.setScorer(dummy); } int contextDoc = docId - currentDocBase; dummy.docId = contextDoc; leafDelegate.collect(contextDoc); } if (delegate instanceof DelegatingCollector) { ((DelegatingCollector) delegate).finish(); } }
public void testDocValuesMemoryIndexVsNormalIndex() throws Exception { Document doc = new Document(); long randomLong = random().nextLong(); doc.add(new NumericDocValuesField("numeric", randomLong)); if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } int numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomLong = random().nextLong(); doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); } if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } } BytesRef randomTerm = new BytesRef(randomTerm()); doc.add(new BinaryDocValuesField("binary", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("binary", randomTerm, Field.Store.NO)); } randomTerm = new BytesRef(randomTerm()); doc.add(new SortedDocValuesField("sorted", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("sorted", randomTerm, Field.Store.NO)); } numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomTerm = new BytesRef(randomTerm()); doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); } if (random().nextBoolean()) { // randomily just add a normal string field doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO)); } } MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); LeafReader leafReader = indexReader.leaves().get(0).reader(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric"); NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric"); assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0)); SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric"); sortedNumericDocValues.setDocument(0); SortedNumericDocValues controlSortedNumericDocValues = controlLeafReader.getSortedNumericDocValues("sorted_numeric"); controlSortedNumericDocValues.setDocument(0); assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count()); for (int i = 0; i < controlSortedNumericDocValues.count(); i++) { assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i)); } BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary"); BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary"); assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0)); SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted"); SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted"); assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount()); assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0)); assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0)); assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0)); SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set"); sortedSetDocValues.setDocument(0); SortedSetDocValues controlSortedSetDocValues = controlLeafReader.getSortedSetDocValues("sorted_set"); controlSortedSetDocValues.setDocument(0); assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount()); for (long controlOrd = controlSortedSetDocValues.nextOrd(); controlOrd != SortedSetDocValues.NO_MORE_ORDS; controlOrd = controlSortedSetDocValues.nextOrd()) { assertEquals(controlOrd, sortedSetDocValues.nextOrd()); assertEquals( controlSortedSetDocValues.lookupOrd(controlOrd), sortedSetDocValues.lookupOrd(controlOrd)); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd()); indexReader.close(); controlIndexReader.close(); dir.close(); }