public void testXContentResponse() throws Exception { String type = randomBoolean() ? "string" : "long"; String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}"; SharedSignificantTermsTestMethods.index01Docs(type, settings, this); SearchResponse response = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation(new SignificantTermsBuilder("sig_terms").field(TEXT_FIELD))) .execute() .actionGet(); assertSearchResponse(response); StringTerms classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map<String, Aggregation> aggs = classBucket.getAggregations().asMap(); assertTrue(aggs.containsKey("sig_terms")); SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms"); assertThat(agg.getBuckets().size(), equalTo(1)); String term = agg.iterator().next().getKeyAsString(); String classTerm = classBucket.getKeyAsString(); assertTrue(term.equals(classTerm)); } XContentBuilder responseBuilder = XContentFactory.jsonBuilder(); classes.toXContent(responseBuilder, null); String result = null; if (type.equals("long")) { result = "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":0,\"key_as_string\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":1,\"key_as_string\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}"; } else { result = "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}"; } assertThat(responseBuilder.string(), equalTo(result)); }
public void testPlugin() throws Exception { String type = randomBoolean() ? "string" : "long"; String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}"; SharedSignificantTermsTestMethods.index01Docs(type, settings, this); SearchResponse response = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( (new SignificantTermsBuilder("sig_terms")) .field(TEXT_FIELD) .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder()) .minDocCount(1))) .execute() .actionGet(); assertSearchResponse(response); StringTerms classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map<String, Aggregation> aggs = classBucket.getAggregations().asMap(); assertTrue(aggs.containsKey("sig_terms")); SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms"); assertThat(agg.getBuckets().size(), equalTo(2)); Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator(); SignificantTerms.Bucket sigBucket = bucketIterator.next(); String term = sigBucket.getKeyAsString(); String classTerm = classBucket.getKeyAsString(); assertTrue(term.equals(classTerm)); assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8)); sigBucket = bucketIterator.next(); assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8)); } // we run the same test again but this time we do not call assertSearchResponse() before the // assertions // the reason is that this would trigger toXContent and we would like to check that this has no // potential side effects response = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( (new SignificantTermsBuilder("sig_terms")) .field(TEXT_FIELD) .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder()) .minDocCount(1))) .execute() .actionGet(); classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map<String, Aggregation> aggs = classBucket.getAggregations().asMap(); assertTrue(aggs.containsKey("sig_terms")); SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms"); assertThat(agg.getBuckets().size(), equalTo(2)); Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator(); SignificantTerms.Bucket sigBucket = bucketIterator.next(); String term = sigBucket.getKeyAsString(); String classTerm = classBucket.getKeyAsString(); assertTrue(term.equals(classTerm)); assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8)); sigBucket = bucketIterator.next(); assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8)); } }
// compute significance score by // 1. terms agg on class and significant terms // 2. filter buckets and set the background to the other class and set is_background false // both should yield exact same result public void testBackgroundVsSeparateSet( SignificanceHeuristicBuilder significanceHeuristicExpectingSuperset, SignificanceHeuristicBuilder significanceHeuristicExpectingSeparateSets) throws Exception { SearchResponse response1 = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( new SignificantTermsBuilder("sig_terms") .field(TEXT_FIELD) .minDocCount(1) .significanceHeuristic(significanceHeuristicExpectingSuperset))) .execute() .actionGet(); assertSearchResponse(response1); SearchResponse response2 = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( (new FilterAggregationBuilder("0")) .filter(QueryBuilders.termQuery(CLASS_FIELD, "0")) .subAggregation( new SignificantTermsBuilder("sig_terms") .field(TEXT_FIELD) .minDocCount(1) .backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")) .significanceHeuristic(significanceHeuristicExpectingSeparateSets))) .addAggregation( (new FilterAggregationBuilder("1")) .filter(QueryBuilders.termQuery(CLASS_FIELD, "1")) .subAggregation( new SignificantTermsBuilder("sig_terms") .field(TEXT_FIELD) .minDocCount(1) .backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")) .significanceHeuristic(significanceHeuristicExpectingSeparateSets))) .execute() .actionGet(); SignificantTerms sigTerms0 = ((SignificantTerms) (((StringTerms) response1.getAggregations().get("class")) .getBucketByKey("0") .getAggregations() .asMap() .get("sig_terms"))); assertThat(sigTerms0.getBuckets().size(), equalTo(2)); double score00Background = sigTerms0.getBucketByKey("0").getSignificanceScore(); double score01Background = sigTerms0.getBucketByKey("1").getSignificanceScore(); SignificantTerms sigTerms1 = ((SignificantTerms) (((StringTerms) response1.getAggregations().get("class")) .getBucketByKey("1") .getAggregations() .asMap() .get("sig_terms"))); double score10Background = sigTerms1.getBucketByKey("0").getSignificanceScore(); double score11Background = sigTerms1.getBucketByKey("1").getSignificanceScore(); double score00SeparateSets = ((SignificantTerms) ((InternalFilter) response2.getAggregations().get("0")) .getAggregations() .getAsMap() .get("sig_terms")) .getBucketByKey("0") .getSignificanceScore(); double score01SeparateSets = ((SignificantTerms) ((InternalFilter) response2.getAggregations().get("0")) .getAggregations() .getAsMap() .get("sig_terms")) .getBucketByKey("1") .getSignificanceScore(); double score10SeparateSets = ((SignificantTerms) ((InternalFilter) response2.getAggregations().get("1")) .getAggregations() .getAsMap() .get("sig_terms")) .getBucketByKey("0") .getSignificanceScore(); double score11SeparateSets = ((SignificantTerms) ((InternalFilter) response2.getAggregations().get("1")) .getAggregations() .getAsMap() .get("sig_terms")) .getBucketByKey("1") .getSignificanceScore(); assertThat(score00Background, equalTo(score00SeparateSets)); assertThat(score01Background, equalTo(score01SeparateSets)); assertThat(score10Background, equalTo(score10SeparateSets)); assertThat(score11Background, equalTo(score11SeparateSets)); }