@Override public TermsEnum iterator() throws IOException { return values.termsEnum(); }
/** * Merges the sortedset docvalues from <code>toMerge</code>. * * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges * ordinals and values and filters deleted documents . */ public void mergeSortedSetField( FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException { mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount()); final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]); final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); // step 1: iterate thru each sub and mark terms still in use TermsEnum liveTerms[] = new TermsEnum[dvs.length]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { AtomicReader reader = readers[sub]; SortedSetDocValues dv = dvs[sub]; Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { liveTerms[sub] = dv.termsEnum(); weights[sub] = dv.getValueCount(); } else { LongBitSet bitset = new LongBitSet(dv.getValueCount()); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs.get(i)) { dv.setDocument(i); long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedSetField( fieldInfo, // ord -> value new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { long currentOrd; @Override public boolean hasNext() { return currentOrd < map.getValueCount(); } @Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); long segmentOrd = map.getFirstSegmentOrd(currentOrd); final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); currentOrd++; return term; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }, // doc -> ord count new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; int nextValue; AtomicReader currentReader; Bits currentLiveDocs; boolean nextIsSet; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); nextValue = 0; while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { nextValue++; } docIDUpto++; return true; } docIDUpto++; } } }; } }, // ords new Iterable<Number>() { @Override public Iterator<Number> iterator() { return new Iterator<Number>() { int readerUpto = -1; int docIDUpto; long nextValue; AtomicReader currentReader; Bits currentLiveDocs; LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; int ordLength; @Override public boolean hasNext() { return nextIsSet || setNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Number next() { if (!hasNext()) { throw new NoSuchElementException(); } assert nextIsSet; nextIsSet = false; // TODO make a mutable number return nextValue; } private boolean setNext() { while (true) { if (readerUpto == readers.length) { return false; } if (ordUpto < ordLength) { nextValue = ords[ordUpto]; ordUpto++; nextIsSet = true; return true; } if (currentReader == null || docIDUpto == currentReader.maxDoc()) { readerUpto++; if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { assert docIDUpto < currentReader.maxDoc(); SortedSetDocValues dv = dvs[readerUpto]; dv.setDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.length) { ords = ArrayUtil.grow(ords, ordLength + 1); } ords[ordLength] = currentMap.get(ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } } }; } }); }
@Override protected void doSetNextReader(LeafReaderContext context) throws IOException { if (segmentFacetCounts != null) { segmentResults.add(createSegmentResult()); } groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField); facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField); facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount(); if (facetFieldNumTerms == 0) { facetOrdTermsEnum = null; } else { facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum(); } // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet // field segmentFacetCounts = new int[facetFieldNumTerms + 1]; segmentTotalCount = 0; segmentGroupedFacetHits.clear(); for (GroupedFacetHit groupedFacetHit : groupedFacetHits) { int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue); if (groupedFacetHit.groupValue != null && groupOrd < 0) { continue; } int facetOrd; if (groupedFacetHit.facetValue != null) { if (facetOrdTermsEnum == null || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) { continue; } facetOrd = (int) facetOrdTermsEnum.ord(); } else { facetOrd = facetFieldNumTerms; } // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not // containing facet field int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); } if (facetPrefix != null) { TermsEnum.SeekStatus seekStatus; if (facetOrdTermsEnum != null) { seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix); } else { seekStatus = TermsEnum.SeekStatus.END; } if (seekStatus != TermsEnum.SeekStatus.END) { startFacetOrd = (int) facetOrdTermsEnum.ord(); } else { startFacetOrd = 0; endFacetOrd = 0; return; } BytesRefBuilder facetEndPrefix = new BytesRefBuilder(); facetEndPrefix.append(facetPrefix); facetEndPrefix.append(UnicodeUtil.BIG_TERM); seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get()); if (seekStatus != TermsEnum.SeekStatus.END) { endFacetOrd = (int) facetOrdTermsEnum.ord(); } else { endFacetOrd = facetFieldNumTerms; // Don't include null... } } else { startFacetOrd = 0; endFacetOrd = facetFieldNumTerms + 1; } }