コード例 #1
0
 /** checks collection-level statistics on Terms */
 public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
   if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
     assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
   }
   if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
     assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
   }
   if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
     assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
   }
   if (leftTerms.size() != -1 && rightTerms.size() != -1) {
     assertEquals(leftTerms.size(), rightTerms.size());
   }
 }
コード例 #2
0
 private void buildFieldStatistics(XContentBuilder builder, Terms curTerms) throws IOException {
   long sumDocFreq = curTerms.getSumDocFreq();
   int docCount = curTerms.getDocCount();
   long sumTotalTermFrequencies = curTerms.getSumTotalTermFreq();
   if (docCount > 0) {
     assert ((sumDocFreq > 0)) : "docCount >= 0 but sumDocFreq ain't!";
     assert ((sumTotalTermFrequencies > 0)) : "docCount >= 0 but sumTotalTermFrequencies ain't!";
     builder.startObject(FieldStrings.FIELD_STATISTICS);
     builder.field(FieldStrings.SUM_DOC_FREQ, sumDocFreq);
     builder.field(FieldStrings.DOC_COUNT, docCount);
     builder.field(FieldStrings.SUM_TTF, sumTotalTermFrequencies);
     builder.endObject();
   } else if (docCount == -1) { // this should only be -1 if the field
     // statistics were not requested at all. In
     // this case all 3 values should be -1
     assert ((sumDocFreq == -1)) : "docCount was -1 but sumDocFreq ain't!";
     assert ((sumTotalTermFrequencies == -1))
         : "docCount was -1 but sumTotalTermFrequencies ain't!";
   } else {
     throw new ElasticsearchIllegalStateException(
         "Something is wrong with the field statistics of the term vector request: Values are "
             + "\n"
             + FieldStrings.SUM_DOC_FREQ
             + " "
             + sumDocFreq
             + "\n"
             + FieldStrings.DOC_COUNT
             + " "
             + docCount
             + "\n"
             + FieldStrings.SUM_TTF
             + " "
             + sumTotalTermFrequencies);
   }
 }
コード例 #3
0
 @Override
 public FieldStats stats(Terms terms, int maxDoc) throws IOException {
   long minValue = NumericUtils.getMinInt(terms);
   long maxValue = NumericUtils.getMaxInt(terms);
   return new FieldStats.Long(
       maxDoc,
       terms.getDocCount(),
       terms.getSumDocFreq(),
       terms.getSumTotalTermFreq(),
       minValue,
       maxValue);
 }
コード例 #4
0
 @Override
 public FieldStats stats(Terms terms, int maxDoc) throws IOException {
   float minValue = NumericUtils.sortableIntToFloat(NumericUtils.getMinInt(terms));
   float maxValue = NumericUtils.sortableIntToFloat(NumericUtils.getMaxInt(terms));
   return new FieldStats.Float(
       maxDoc,
       terms.getDocCount(),
       terms.getSumDocFreq(),
       terms.getSumTotalTermFreq(),
       minValue,
       maxValue);
 }
コード例 #5
0
 @Override
 public FieldStats.Long stats(IndexReader reader) throws IOException {
   int maxDoc = reader.maxDoc();
   Terms terms = org.apache.lucene.index.MultiFields.getTerms(reader, name());
   if (terms == null) {
     return null;
   }
   long minValue = LegacyNumericUtils.getMinInt(terms);
   long maxValue = LegacyNumericUtils.getMaxInt(terms);
   return new FieldStats.Long(
       maxDoc,
       terms.getDocCount(),
       terms.getSumDocFreq(),
       terms.getSumTotalTermFreq(),
       isSearchable(),
       isAggregatable(),
       minValue,
       maxValue);
 }
コード例 #6
0
    public static TermsEnum filter(
        TermsEnum toFilter, Terms terms, AtomicReader reader, Settings settings)
        throws IOException {
      int docCount = terms.getDocCount();
      if (docCount == -1) {
        docCount = reader.maxDoc();
      }
      final double minFrequency = settings.getAsDouble("min", 0d);
      final double maxFrequency = settings.getAsDouble("max", docCount + 1d);
      final double minSegmentSize = settings.getAsInt("min_segment_size", 0);
      if (minSegmentSize < docCount) {
        final int minFreq =
            minFrequency >= 1.0 ? (int) minFrequency : (int) (docCount * minFrequency);
        final int maxFreq =
            maxFrequency >= 1.0 ? (int) maxFrequency : (int) (docCount * maxFrequency);
        assert minFreq < maxFreq;
        return new FrequencyFilter(toFilter, minFreq, maxFreq);
      }

      return toFilter;
    }
コード例 #7
0
 @Override
 public int getDocCount() throws IOException {
   return delegateTerms.getDocCount();
 }
コード例 #8
0
  private void checkAllInfo(
      int numDocs,
      String[] values,
      int[] freq,
      int[][] pos,
      int[][] startOffset,
      int[][] endOffset,
      int i)
      throws IOException {
    TermVectorsRequestBuilder resp =
        client()
            .prepareTermVectors("test", "type1", Integer.toString(i))
            .setPayloads(true)
            .setOffsets(true)
            .setPositions(true)
            .setFieldStatistics(true)
            .setTermStatistics(true)
            .setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
      String string = values[j];
      BytesRef next = iterator.next();
      assertThat(next, Matchers.notNullValue());
      assertThat("expected " + string, string, equalTo(next.utf8ToString()));
      assertThat(next, Matchers.notNullValue());
      if (string.equals("the")) {
        assertThat(
            "expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
      } else {
        assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
      }

      PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL);
      assertThat(docsAndPositions.nextDoc(), equalTo(0));
      assertThat(freq[j], equalTo(docsAndPositions.freq()));
      assertThat(iterator.docFreq(), equalTo(numDocs));
      int[] termPos = pos[j];
      int[] termStartOffset = startOffset[j];
      int[] termEndOffset = endOffset[j];
      assertThat(termPos.length, equalTo(freq[j]));
      assertThat(termStartOffset.length, equalTo(freq[j]));
      assertThat(termEndOffset.length, equalTo(freq[j]));
      for (int k = 0; k < freq[j]; k++) {
        int nextPosition = docsAndPositions.nextPosition();
        assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
        assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
        assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
        assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
      }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    xBuilder.startObject();
    response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
    xBuilder.endObject();
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString =
        "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
            + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
  }
コード例 #9
0
  private void duellReaders(CompositeReader other, LeafReader memIndexReader) throws IOException {
    Fields memFields = memIndexReader.fields();
    for (String field : MultiFields.getFields(other)) {
      Terms memTerms = memFields.terms(field);
      Terms iwTerms = memIndexReader.terms(field);
      if (iwTerms == null) {
        assertNull(memTerms);
      } else {
        NumericDocValues normValues = MultiDocValues.getNormValues(other, field);
        NumericDocValues memNormValues = memIndexReader.getNormValues(field);
        if (normValues != null) {
          // mem idx always computes norms on the fly
          assertNotNull(memNormValues);
          assertEquals(normValues.get(0), memNormValues.get(0));
        }

        assertNotNull(memTerms);
        assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
        assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
        assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
        TermsEnum iwTermsIter = iwTerms.iterator();
        TermsEnum memTermsIter = memTerms.iterator();
        if (iwTerms.hasPositions()) {
          final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();

          while (iwTermsIter.next() != null) {
            assertNotNull(memTermsIter.next());
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            PostingsEnum iwDocsAndPos = iwTermsIter.postings(null, PostingsEnum.ALL);
            PostingsEnum memDocsAndPos = memTermsIter.postings(null, PostingsEnum.ALL);
            while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
              for (int i = 0; i < iwDocsAndPos.freq(); i++) {
                assertEquals(
                    "term: " + iwTermsIter.term().utf8ToString(),
                    iwDocsAndPos.nextPosition(),
                    memDocsAndPos.nextPosition());
                if (offsets) {
                  assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
                  assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
                }

                if (iwTerms.hasPayloads()) {
                  assertEquals(iwDocsAndPos.getPayload(), memDocsAndPos.getPayload());
                }
              }
            }
          }
        } else {
          while (iwTermsIter.next() != null) {
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            PostingsEnum iwDocsAndPos = iwTermsIter.postings(null);
            PostingsEnum memDocsAndPos = memTermsIter.postings(null);
            while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
            }
          }
        }
      }
    }
  }