コード例 #1
0
  @Override
  public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub)
      throws IOException {
    final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
    return new LeafBucketCollectorBase(sub, values) {
      final BytesRefBuilder previous = new BytesRefBuilder();

      @Override
      public void collect(int doc, long bucket) throws IOException {
        assert bucket == 0;
        values.setDocument(doc);
        final int valuesCount = values.count();

        // SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
        previous.clear();
        for (int i = 0; i < valuesCount; ++i) {
          final BytesRef bytes = values.valueAt(i);
          if (includeExclude != null && !includeExclude.accept(bytes)) {
            continue;
          }
          if (previous.get().equals(bytes)) {
            continue;
          }
          long bucketOrdinal = bucketOrds.add(bytes);
          if (bucketOrdinal < 0) { // already seen
            bucketOrdinal = -1 - bucketOrdinal;
            collectExistingBucket(sub, doc, bucketOrdinal);
          } else {
            collectBucket(sub, doc, bucketOrdinal);
          }
          previous.copyBytes(bytes);
        }
      }
    };
  }
コード例 #2
0
  private Collector createCollector(AtomicReaderContext reader) {

    // if rehash is false then the value source is either already hashed, or the user explicitly
    // requested not to hash the values (perhaps they already hashed the values themselves before
    // indexing the doc)
    // so we can just work with the original value source as is
    if (!rehash) {
      MurmurHash3Values hashValues =
          MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues());
      return new DirectCollector(counts, hashValues);
    }

    if (valuesSource instanceof ValuesSource.Numeric) {
      ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource;
      MurmurHash3Values hashValues =
          source.isFloatingPoint()
              ? MurmurHash3Values.hash(source.doubleValues())
              : MurmurHash3Values.hash(source.longValues());
      return new DirectCollector(counts, hashValues);
    }

    if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
      ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource;
      final RandomAccessOrds ordinalValues = source.ordinalsValues();
      final long maxOrd = ordinalValues.getValueCount();
      if (maxOrd == 0) {
        return new EmptyCollector();
      }

      final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd);
      final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision);
      // only use ordinals if they don't increase memory usage by more than 25%
      if (ordinalsMemoryUsage < countsMemoryUsage / 4) {
        return new OrdinalsCollector(counts, ordinalValues, bigArrays);
      }
    }

    return new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues()));
  }
コード例 #3
0
 @Override
 public boolean needsScores() {
   return (valuesSource != null && valuesSource.needsScores()) || super.needsScores();
 }
コード例 #4
0
  @Override
  public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;

    if (bucketCountThresholds.getMinDocCount() == 0
        && (order != InternalOrder.COUNT_DESC
            || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
      // we need to fill-in the blanks
      for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
        final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
        // brute force
        for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
          values.setDocument(docId);
          final int valueCount = values.count();
          for (int i = 0; i < valueCount; ++i) {
            final BytesRef term = values.valueAt(i);
            if (includeExclude == null || includeExclude.accept(term)) {
              bucketOrds.add(term);
            }
          }
        }
      }
    }

    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());

    long otherDocCount = 0;
    BucketPriorityQueue<StringTerms.Bucket> ordered =
        new BucketPriorityQueue<>(size, order.comparator(this));
    StringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
      if (spare == null) {
        spare = new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
      }
      bucketOrds.get(i, spare.termBytes);
      spare.docCount = bucketDocCount(i);
      otherDocCount += spare.docCount;
      spare.bucketOrd = i;
      if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
        spare = ordered.insertWithOverflow(spare);
      }
    }

    // Get the top buckets
    final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
    long survivingBucketOrds[] = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
      final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
      survivingBucketOrds[i] = bucket.bucketOrd;
      list[i] = bucket;
      otherDocCount -= bucket.docCount;
    }
    // replay any deferred collections
    runDeferredCollections(survivingBucketOrds);

    // Now build the aggs
    for (int i = 0; i < list.length; i++) {
      final StringTerms.Bucket bucket = (StringTerms.Bucket) list[i];
      bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
      bucket.aggregations = bucketAggregations(bucket.bucketOrd);
      bucket.docCountError = 0;
    }

    return new StringTerms(
        name,
        order,
        bucketCountThresholds.getRequiredSize(),
        bucketCountThresholds.getMinDocCount(),
        pipelineAggregators(),
        metaData(),
        format,
        bucketCountThresholds.getShardSize(),
        showTermDocCountError,
        otherDocCount,
        Arrays.asList(list),
        0);
  }