public void testScoresEqualForPositiveAndNegative(SignificanceHeuristicBuilder heuristic) throws Exception { // check that results for both classes are the same with exclude negatives = false and classes // are routing ids SearchResponse response = client() .prepareSearch("test") .addAggregation( new TermsBuilder("class") .field("class") .subAggregation( new SignificantTermsBuilder("mySignificantTerms") .field("text") .executionHint(randomExecutionHint()) .significanceHeuristic(heuristic) .minDocCount(1) .shardSize(1000) .size(1000))) .execute() .actionGet(); assertSearchResponse(response); StringTerms classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); Iterator<Terms.Bucket> classBuckets = classes.getBuckets().iterator(); Collection<SignificantTerms.Bucket> classA = ((SignificantTerms) classBuckets.next().getAggregations().get("mySignificantTerms")) .getBuckets(); Iterator<SignificantTerms.Bucket> classBBucketIterator = ((SignificantTerms) classBuckets.next().getAggregations().get("mySignificantTerms")) .getBuckets() .iterator(); assertThat(classA.size(), greaterThan(0)); for (SignificantTerms.Bucket classABucket : classA) { SignificantTerms.Bucket classBBucket = classBBucketIterator.next(); assertThat(classABucket.getKey(), equalTo(classBBucket.getKey())); assertThat( classABucket.getSignificanceScore(), closeTo(classBBucket.getSignificanceScore(), 1.e-5)); } }
public void testScriptScore() throws ExecutionException, InterruptedException, IOException { indexRandomFrequencies01(randomBoolean() ? "string" : "long"); ScriptHeuristic.ScriptHeuristicBuilder scriptHeuristicBuilder = getScriptSignificanceHeuristicBuilder(); ensureYellow(); SearchResponse response = client() .prepareSearch(INDEX_NAME) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( new SignificantTermsBuilder("mySignificantTerms") .field(TEXT_FIELD) .executionHint(randomExecutionHint()) .significanceHeuristic(scriptHeuristicBuilder) .minDocCount(1) .shardSize(2) .size(2))) .execute() .actionGet(); assertSearchResponse(response); for (Terms.Bucket classBucket : ((Terms) response.getAggregations().get("class")).getBuckets()) { for (SignificantTerms.Bucket bucket : ((SignificantTerms) classBucket.getAggregations().get("mySignificantTerms")) .getBuckets()) { assertThat( bucket.getSignificanceScore(), is( (double) bucket.getSubsetDf() + bucket.getSubsetSize() + bucket.getSupersetDf() + bucket.getSupersetSize())); } } }
public void testPlugin() throws Exception { String type = randomBoolean() ? "string" : "long"; String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}"; SharedSignificantTermsTestMethods.index01Docs(type, settings, this); SearchResponse response = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( (new SignificantTermsBuilder("sig_terms")) .field(TEXT_FIELD) .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder()) .minDocCount(1))) .execute() .actionGet(); assertSearchResponse(response); StringTerms classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map<String, Aggregation> aggs = classBucket.getAggregations().asMap(); assertTrue(aggs.containsKey("sig_terms")); SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms"); assertThat(agg.getBuckets().size(), equalTo(2)); Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator(); SignificantTerms.Bucket sigBucket = bucketIterator.next(); String term = sigBucket.getKeyAsString(); String classTerm = classBucket.getKeyAsString(); assertTrue(term.equals(classTerm)); assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8)); sigBucket = bucketIterator.next(); assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8)); } // we run the same test again but this time we do not call assertSearchResponse() before the // assertions // the reason is that this would trigger toXContent and we would like to check that this has no // potential side effects response = client() .prepareSearch(INDEX_NAME) .setTypes(DOC_TYPE) .addAggregation( new TermsBuilder("class") .field(CLASS_FIELD) .subAggregation( (new SignificantTermsBuilder("sig_terms")) .field(TEXT_FIELD) .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder()) .minDocCount(1))) .execute() .actionGet(); classes = (StringTerms) response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map<String, Aggregation> aggs = classBucket.getAggregations().asMap(); assertTrue(aggs.containsKey("sig_terms")); SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms"); assertThat(agg.getBuckets().size(), equalTo(2)); Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator(); SignificantTerms.Bucket sigBucket = bucketIterator.next(); String term = sigBucket.getKeyAsString(); String classTerm = classBucket.getKeyAsString(); assertTrue(term.equals(classTerm)); assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8)); sigBucket = bucketIterator.next(); assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8)); } }