/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ static void accumMulti( int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */ static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) { final LongValues ordMap = map.getGlobalOrds(subIndex); // missing count counts[0] += segCounts[0]; // migrate actual ordinals for (int ord = 1; ord < segCounts.length; ord++) { int count = segCounts[ord]; if (count != 0) { counts[1 + (int) ordMap.get(ord - 1)] += count; } } }
// specialized since the single-valued case is different static void accumSingle( int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null && term >= 0) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < counts.length) counts[arrIdx]++; } }
/** * Merges the sortedset docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedSetField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedSetDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { dv.setDocument(i); long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedSetField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { long currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); long segmentOrd = map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord count new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); nextValue = 0; while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { nextValue++; } docIDUpto++; return true; } docIDUpto++; } } }; } }, // ords new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; long nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; int ordLength; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (ordUpto < ordLength) { nextValue = ords[ordUpto]; ordUpto++; nextIsSet = true; return true; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { assert docIDUpto < currentReader.maxDoc(); SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.length) { ords = ArrayUtil.grow(ords, ordLength + 1); } ords[ordLength] = currentMap.get(ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } } }; } }); }