Ejemplo n.º 1
0
 public static LinkedHashMap<String, Long> termsToMap(Terms terms) {
   LinkedHashMap<String, Long> map = new LinkedHashMap<>();
   List<Terms.Bucket> buckets = terms.getBuckets();
   for (Terms.Bucket bucket : buckets) {
     map.put(bucket.getKeyAsString(), bucket.getDocCount());
   }
   return map;
 }
Ejemplo n.º 2
0
  @Test
  public void testMetric_topLevel() throws Exception {
    SearchResponse response =
        client()
            .prepareSearch("idx")
            .addAggregation(
                terms("terms")
                    .field("tag")
                    .subAggregation(sum("sum").field(SINGLE_VALUED_FIELD_NAME)))
            .addAggregation(maxBucket("max_bucket").setBucketsPaths("terms>sum"))
            .execute()
            .actionGet();

    assertSearchResponse(response);

    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    List<Terms.Bucket> buckets = terms.getBuckets();
    assertThat(buckets.size(), equalTo(interval));

    List<String> maxKeys = new ArrayList<>();
    double maxValue = Double.NEGATIVE_INFINITY;
    for (int i = 0; i < interval; ++i) {
      Terms.Bucket bucket = buckets.get(i);
      assertThat(bucket, notNullValue());
      assertThat((String) bucket.getKey(), equalTo("tag" + (i % interval)));
      assertThat(bucket.getDocCount(), greaterThan(0l));
      Sum sum = bucket.getAggregations().get("sum");
      assertThat(sum, notNullValue());
      if (sum.value() > maxValue) {
        maxValue = sum.value();
        maxKeys = new ArrayList<>();
        maxKeys.add(bucket.getKeyAsString());
      } else if (sum.value() == maxValue) {
        maxKeys.add(bucket.getKeyAsString());
      }
    }

    InternalBucketMetricValue maxBucketValue = response.getAggregations().get("max_bucket");
    assertThat(maxBucketValue, notNullValue());
    assertThat(maxBucketValue.getName(), equalTo("max_bucket"));
    assertThat(maxBucketValue.value(), equalTo(maxValue));
    assertThat(maxBucketValue.keys(), equalTo(maxKeys.toArray(new String[maxKeys.size()])));
  }
  private void assertDocCountErrorWithinBounds(
      int size, SearchResponse accurateResponse, SearchResponse testResponse) {
    Terms accurateTerms = accurateResponse.getAggregations().get("terms");
    assertThat(accurateTerms, notNullValue());
    assertThat(accurateTerms.getName(), equalTo("terms"));
    assertThat(accurateTerms.getDocCountError(), equalTo(0L));

    Terms testTerms = testResponse.getAggregations().get("terms");
    assertThat(testTerms, notNullValue());
    assertThat(testTerms.getName(), equalTo("terms"));
    assertThat(testTerms.getDocCountError(), greaterThanOrEqualTo(0L));
    Collection<Bucket> testBuckets = testTerms.getBuckets();
    assertThat(testBuckets.size(), lessThanOrEqualTo(size));
    assertThat(accurateTerms.getBuckets().size(), greaterThanOrEqualTo(testBuckets.size()));

    for (Terms.Bucket testBucket : testBuckets) {
      assertThat(testBucket, notNullValue());
      Terms.Bucket accurateBucket = accurateTerms.getBucketByKey(testBucket.getKeyAsString());
      assertThat(accurateBucket, notNullValue());
      assertThat(accurateBucket.getDocCountError(), equalTo(0L));
      assertThat(testBucket.getDocCountError(), lessThanOrEqualTo(testTerms.getDocCountError()));
      assertThat(
          testBucket.getDocCount() + testBucket.getDocCountError(),
          greaterThanOrEqualTo(accurateBucket.getDocCount()));
      assertThat(
          testBucket.getDocCount() - testBucket.getDocCountError(),
          lessThanOrEqualTo(accurateBucket.getDocCount()));
    }

    for (Terms.Bucket accurateBucket : accurateTerms.getBuckets()) {
      assertThat(accurateBucket, notNullValue());
      Terms.Bucket testBucket = accurateTerms.getBucketByKey(accurateBucket.getKeyAsString());
      if (testBucket == null) {
        assertThat(accurateBucket.getDocCount(), lessThanOrEqualTo(testTerms.getDocCountError()));
      }
    }
  }
  public void testXContentResponse() throws Exception {
    String type = randomBoolean() ? "string" : "long";
    String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
    SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
    SearchResponse response =
        client()
            .prepareSearch(INDEX_NAME)
            .setTypes(DOC_TYPE)
            .addAggregation(
                new TermsBuilder("class")
                    .field(CLASS_FIELD)
                    .subAggregation(new SignificantTermsBuilder("sig_terms").field(TEXT_FIELD)))
            .execute()
            .actionGet();
    assertSearchResponse(response);
    StringTerms classes = (StringTerms) response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    for (Terms.Bucket classBucket : classes.getBuckets()) {
      Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
      assertTrue(aggs.containsKey("sig_terms"));
      SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
      assertThat(agg.getBuckets().size(), equalTo(1));
      String term = agg.iterator().next().getKeyAsString();
      String classTerm = classBucket.getKeyAsString();
      assertTrue(term.equals(classTerm));
    }

    XContentBuilder responseBuilder = XContentFactory.jsonBuilder();
    classes.toXContent(responseBuilder, null);
    String result = null;
    if (type.equals("long")) {
      result =
          "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":0,\"key_as_string\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":1,\"key_as_string\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
    } else {
      result =
          "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
    }
    assertThat(responseBuilder.string(), equalTo(result));
  }
  public void testPlugin() throws Exception {
    String type = randomBoolean() ? "string" : "long";
    String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
    SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
    SearchResponse response =
        client()
            .prepareSearch(INDEX_NAME)
            .setTypes(DOC_TYPE)
            .addAggregation(
                new TermsBuilder("class")
                    .field(CLASS_FIELD)
                    .subAggregation(
                        (new SignificantTermsBuilder("sig_terms"))
                            .field(TEXT_FIELD)
                            .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder())
                            .minDocCount(1)))
            .execute()
            .actionGet();
    assertSearchResponse(response);
    StringTerms classes = (StringTerms) response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    for (Terms.Bucket classBucket : classes.getBuckets()) {
      Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
      assertTrue(aggs.containsKey("sig_terms"));
      SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
      assertThat(agg.getBuckets().size(), equalTo(2));
      Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
      SignificantTerms.Bucket sigBucket = bucketIterator.next();
      String term = sigBucket.getKeyAsString();
      String classTerm = classBucket.getKeyAsString();
      assertTrue(term.equals(classTerm));
      assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
      sigBucket = bucketIterator.next();
      assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
    }

    // we run the same test again but this time we do not call assertSearchResponse() before the
    // assertions
    // the reason is that this would trigger toXContent and we would like to check that this has no
    // potential side effects

    response =
        client()
            .prepareSearch(INDEX_NAME)
            .setTypes(DOC_TYPE)
            .addAggregation(
                new TermsBuilder("class")
                    .field(CLASS_FIELD)
                    .subAggregation(
                        (new SignificantTermsBuilder("sig_terms"))
                            .field(TEXT_FIELD)
                            .significanceHeuristic(new SimpleHeuristic.SimpleHeuristicBuilder())
                            .minDocCount(1)))
            .execute()
            .actionGet();

    classes = (StringTerms) response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    for (Terms.Bucket classBucket : classes.getBuckets()) {
      Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
      assertTrue(aggs.containsKey("sig_terms"));
      SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
      assertThat(agg.getBuckets().size(), equalTo(2));
      Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
      SignificantTerms.Bucket sigBucket = bucketIterator.next();
      String term = sigBucket.getKeyAsString();
      String classTerm = classBucket.getKeyAsString();
      assertTrue(term.equals(classTerm));
      assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
      sigBucket = bucketIterator.next();
      assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
    }
  }
Ejemplo n.º 6
0
  @Test
  public void testNested() throws Exception {
    SearchResponse response =
        client()
            .prepareSearch("idx")
            .addAggregation(
                terms("terms")
                    .field("tag")
                    .order(Order.term(true))
                    .subAggregation(
                        histogram("histo")
                            .field(SINGLE_VALUED_FIELD_NAME)
                            .interval(interval)
                            .extendedBounds((long) minRandomValue, (long) maxRandomValue))
                    .subAggregation(maxBucket("max_histo_bucket").setBucketsPaths("histo>_count")))
            .addAggregation(maxBucket("max_terms_bucket").setBucketsPaths("terms>max_histo_bucket"))
            .execute()
            .actionGet();

    assertSearchResponse(response);

    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    List<Terms.Bucket> termsBuckets = terms.getBuckets();
    assertThat(termsBuckets.size(), equalTo(interval));

    List<String> maxTermsKeys = new ArrayList<>();
    double maxTermsValue = Double.NEGATIVE_INFINITY;
    for (int i = 0; i < interval; ++i) {
      Terms.Bucket termsBucket = termsBuckets.get(i);
      assertThat(termsBucket, notNullValue());
      assertThat((String) termsBucket.getKey(), equalTo("tag" + (i % interval)));

      Histogram histo = termsBucket.getAggregations().get("histo");
      assertThat(histo, notNullValue());
      assertThat(histo.getName(), equalTo("histo"));
      List<? extends Bucket> buckets = histo.getBuckets();

      List<String> maxHistoKeys = new ArrayList<>();
      double maxHistoValue = Double.NEGATIVE_INFINITY;
      for (int j = 0; j < numValueBuckets; ++j) {
        Histogram.Bucket bucket = buckets.get(j);
        assertThat(bucket, notNullValue());
        assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) j * interval));
        if (bucket.getDocCount() > maxHistoValue) {
          maxHistoValue = bucket.getDocCount();
          maxHistoKeys = new ArrayList<>();
          maxHistoKeys.add(bucket.getKeyAsString());
        } else if (bucket.getDocCount() == maxHistoValue) {
          maxHistoKeys.add(bucket.getKeyAsString());
        }
      }

      InternalBucketMetricValue maxBucketValue =
          termsBucket.getAggregations().get("max_histo_bucket");
      assertThat(maxBucketValue, notNullValue());
      assertThat(maxBucketValue.getName(), equalTo("max_histo_bucket"));
      assertThat(maxBucketValue.value(), equalTo(maxHistoValue));
      assertThat(
          maxBucketValue.keys(), equalTo(maxHistoKeys.toArray(new String[maxHistoKeys.size()])));
      if (maxHistoValue > maxTermsValue) {
        maxTermsValue = maxHistoValue;
        maxTermsKeys = new ArrayList<>();
        maxTermsKeys.add(termsBucket.getKeyAsString());
      } else if (maxHistoValue == maxTermsValue) {
        maxTermsKeys.add(termsBucket.getKeyAsString());
      }
    }

    InternalBucketMetricValue maxBucketValue = response.getAggregations().get("max_terms_bucket");
    assertThat(maxBucketValue, notNullValue());
    assertThat(maxBucketValue.getName(), equalTo("max_terms_bucket"));
    assertThat(maxBucketValue.value(), equalTo(maxTermsValue));
    assertThat(
        maxBucketValue.keys(), equalTo(maxTermsKeys.toArray(new String[maxTermsKeys.size()])));
  }