Ejemplo n.º 1
0
 @Override
 public TermsEnum iterator() throws IOException {
   return values.termsEnum();
 }
  /**
   * Merges the sortedset docvalues from <code>toMerge</code>.
   *
   * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges
   * ordinals and values and filters deleted documents .
   */
  public void mergeSortedSetField(
      FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge)
      throws IOException {

    mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount());

    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);

    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedSetDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
        weights[sub] = dv.getValueCount();
      } else {
        LongBitSet bitset = new LongBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            dv.setDocument(i);
            long ord;
            while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
        weights[sub] = bitset.cardinality();
      }
    }

    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT);

    // step 3: add field
    addSortedSetField(
        fieldInfo,
        // ord -> value
        new Iterable<BytesRef>() {
          @Override
          public Iterator<BytesRef> iterator() {
            return new Iterator<BytesRef>() {
              long currentOrd;

              @Override
              public boolean hasNext() {
                return currentOrd < map.getValueCount();
              }

              @Override
              public BytesRef next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                int segmentNumber = map.getFirstSegmentNumber(currentOrd);
                long segmentOrd = map.getFirstSegmentOrd(currentOrd);
                final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd);
                currentOrd++;
                return term;
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }
            };
          }
        },
        // doc -> ord count
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              int nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              boolean nextIsSet;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    nextIsSet = true;
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    nextValue = 0;
                    while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                      nextValue++;
                    }
                    docIDUpto++;
                    return true;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        },
        // ords
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              long nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              LongValues currentMap;
              boolean nextIsSet;
              long ords[] = new long[8];
              int ordUpto;
              int ordLength;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (ordUpto < ordLength) {
                    nextValue = ords[ordUpto];
                    ordUpto++;
                    nextIsSet = true;
                    return true;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                      currentMap = map.getGlobalOrds(readerUpto);
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    assert docIDUpto < currentReader.maxDoc();
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                      if (ordLength == ords.length) {
                        ords = ArrayUtil.grow(ords, ordLength + 1);
                      }
                      ords[ordLength] = currentMap.get(ord);
                      ordLength++;
                    }
                    docIDUpto++;
                    continue;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        });
  }
Ejemplo n.º 3
0
    @Override
    protected void doSetNextReader(LeafReaderContext context) throws IOException {
      if (segmentFacetCounts != null) {
        segmentResults.add(createSegmentResult());
      }

      groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
      facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
      facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
      if (facetFieldNumTerms == 0) {
        facetOrdTermsEnum = null;
      } else {
        facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
      }
      // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet
      // field
      segmentFacetCounts = new int[facetFieldNumTerms + 1];
      segmentTotalCount = 0;

      segmentGroupedFacetHits.clear();
      for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
        int groupOrd =
            groupedFacetHit.groupValue == null
                ? -1
                : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
        if (groupedFacetHit.groupValue != null && groupOrd < 0) {
          continue;
        }

        int facetOrd;
        if (groupedFacetHit.facetValue != null) {
          if (facetOrdTermsEnum == null
              || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) {
            continue;
          }
          facetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          facetOrd = facetFieldNumTerms;
        }

        // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not
        // containing facet field
        int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
        segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
      }

      if (facetPrefix != null) {
        TermsEnum.SeekStatus seekStatus;
        if (facetOrdTermsEnum != null) {
          seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix);
        } else {
          seekStatus = TermsEnum.SeekStatus.END;
        }

        if (seekStatus != TermsEnum.SeekStatus.END) {
          startFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          startFacetOrd = 0;
          endFacetOrd = 0;
          return;
        }

        BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
        facetEndPrefix.append(facetPrefix);
        facetEndPrefix.append(UnicodeUtil.BIG_TERM);
        seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get());
        if (seekStatus != TermsEnum.SeekStatus.END) {
          endFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          endFacetOrd = facetFieldNumTerms; // Don't include null...
        }
      } else {
        startFacetOrd = 0;
        endFacetOrd = facetFieldNumTerms + 1;
      }
    }