@Override public InternalAggregation doReduce( List<InternalAggregation> aggregations, ReduceContext reduceContext) { long globalSubsetSize = 0; long globalSupersetSize = 0; // Compute the overall result set size and the corpus size using the // top-level Aggregations from each shard for (InternalAggregation aggregation : aggregations) { InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation; globalSubsetSize += terms.subsetSize; globalSupersetSize += terms.supersetSize; } Map<String, List<InternalSignificantTerms.Bucket>> buckets = new HashMap<>(); for (InternalAggregation aggregation : aggregations) { InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation; for (Bucket bucket : terms.buckets) { List<Bucket> existingBuckets = buckets.get(bucket.getKeyAsString()); if (existingBuckets == null) { existingBuckets = new ArrayList<>(aggregations.size()); buckets.put(bucket.getKeyAsString(), existingBuckets); } // Adjust the buckets with the global stats representing the // total size of the pots from which the stats are drawn existingBuckets.add( bucket.newBucket( bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations)); } } significanceHeuristic.initialize(reduceContext); final int size = Math.min(requiredSize, buckets.size()); BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size); for (Map.Entry<String, List<Bucket>> entry : buckets.entrySet()) { List<Bucket> sameTermBuckets = entry.getValue(); final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext); b.updateScore(significanceHeuristic); if ((b.score > 0) && (b.subsetDf >= minDocCount)) { ordered.insertWithOverflow(b); } } Bucket[] list = new Bucket[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (Bucket) ordered.pop(); } return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list), this); }
@Override protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { bucketCountThresholds.toXContent(builder, params); if (executionHint != null) { builder.field( TermsAggregationBuilder.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); } if (filterBuilder != null) { builder.field(BACKGROUND_FILTER.getPreferredName(), filterBuilder); } if (includeExclude != null) { includeExclude.toXContent(builder, params); } significanceHeuristic.toXContent(builder, params); return builder; }
public void updateScore(SignificanceHeuristic significanceHeuristic) { score = significanceHeuristic.getScore(subsetDf, subsetSize, supersetDf, supersetSize); }