/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ static void accumMulti( int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
/** * "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global * ordinals as a separate step */ static void accumMultiSeg( int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1 + (int) si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); int term = (int) si.nextOrd(); if (term < 0) { counts[0]++; // missing } else { do { segCounts[1 + term]++; } while ((term = (int) si.nextOrd()) >= 0); } } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
@Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { final BytesRef term = docTermOrds.lookupOrd(ord); collectorTerms.add(term); } }
@Override public void collect(int doc) throws IOException { if (doc > groupFieldTermsIndex.docID()) { groupFieldTermsIndex.advance(doc); } int groupOrd; if (doc == groupFieldTermsIndex.docID()) { groupOrd = groupFieldTermsIndex.ordValue(); } else { groupOrd = -1; } if (facetFieldNumTerms == 0) { int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1); if (facetPrefix != null || segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { return; } segmentTotalCount++; segmentFacetCounts[facetFieldNumTerms]++; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); BytesRef groupKey; if (groupOrd == -1) { groupKey = null; } else { groupKey = BytesRef.deepCopyOf(groupFieldTermsIndex.lookupOrd(groupOrd)); } groupedFacetHits.add(new GroupedFacetHit(groupKey, null)); return; } if (doc > facetFieldDocTermOrds.docID()) { facetFieldDocTermOrds.advance(doc); } boolean empty = true; if (doc == facetFieldDocTermOrds.docID()) { long ord; while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { process(groupOrd, (int) ord); empty = false; } } if (empty) { process( groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field. } }
/** * Merges the sortedset docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedSetField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedSetDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { dv.setDocument(i); long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedSetField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { long currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); long segmentOrd = map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord count new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); nextValue = 0; while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { nextValue++; } docIDUpto++; return true; } docIDUpto++; } } }; } }, // ords new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; long nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; int ordLength; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (ordUpto < ordLength) { nextValue = ords[ordUpto]; ordUpto++; nextIsSet = true; return true; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { assert docIDUpto < currentReader.maxDoc(); SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.length) { ords = ArrayUtil.grow(ords, ordLength + 1); } ords[ordLength] = currentMap.get(ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } } }; } }); }
public void testDocValuesMemoryIndexVsNormalIndex() throws Exception { Document doc = new Document(); long randomLong = random().nextLong(); doc.add(new NumericDocValuesField("numeric", randomLong)); if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } int numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomLong = random().nextLong(); doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); } if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } } BytesRef randomTerm = new BytesRef(randomTerm()); doc.add(new BinaryDocValuesField("binary", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("binary", randomTerm, Field.Store.NO)); } randomTerm = new BytesRef(randomTerm()); doc.add(new SortedDocValuesField("sorted", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("sorted", randomTerm, Field.Store.NO)); } numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomTerm = new BytesRef(randomTerm()); doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); } if (random().nextBoolean()) { // randomily just add a normal string field doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO)); } } MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); LeafReader leafReader = indexReader.leaves().get(0).reader(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric"); NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric"); assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0)); SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric"); sortedNumericDocValues.setDocument(0); SortedNumericDocValues controlSortedNumericDocValues = controlLeafReader.getSortedNumericDocValues("sorted_numeric"); controlSortedNumericDocValues.setDocument(0); assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count()); for (int i = 0; i < controlSortedNumericDocValues.count(); i++) { assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i)); } BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary"); BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary"); assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0)); SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted"); SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted"); assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount()); assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0)); assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0)); assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0)); SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set"); sortedSetDocValues.setDocument(0); SortedSetDocValues controlSortedSetDocValues = controlLeafReader.getSortedSetDocValues("sorted_set"); controlSortedSetDocValues.setDocument(0); assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount()); for (long controlOrd = controlSortedSetDocValues.nextOrd(); controlOrd != SortedSetDocValues.NO_MORE_ORDS; controlOrd = controlSortedSetDocValues.nextOrd()) { assertEquals(controlOrd, sortedSetDocValues.nextOrd()); assertEquals( controlSortedSetDocValues.lookupOrd(controlOrd), sortedSetDocValues.lookupOrd(controlOrd)); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd()); indexReader.close(); controlIndexReader.close(); dir.close(); }