private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
    int cnt = 0;
    VerifyingCollector collector = new VerifyingCollector();
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        if (VERBOSE) System.out.println("\nSpans Dump --");
        collector.reset();
        spans.collect(collector);
        assertEquals("payload size", numPayloads[cnt], collector.payloads.size());

        cnt++;
      }
    }

    assertEquals("expected numSpans", numSpans, cnt);
  }
  public void testShrinkToAfterShortestMatch3() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));

    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    IndexSearcher is = newSearcher(reader);
    writer.close();

    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = {stq1, stq2};
    SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
    Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);

    TopDocs topDocs = is.search(snq, 1);
    Set<String> payloadSet = new HashSet<>();
    VerifyingCollector collector = new VerifyingCollector();
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
      while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
          collector.reset();
          spans.collect(collector);
          for (final BytesRef payload : collector.payloads) {
            payloadSet.add(Term.toString(payload));
          }
        }
      }
    }
    assertEquals(2, payloadSet.size());
    if (VERBOSE) {
      for (final String payload : payloadSet) System.out.println("match:" + payload);
    }
    assertTrue(payloadSet.contains("a:Noise:10"));
    assertTrue(payloadSet.contains("k:Noise:11"));
    reader.close();
    directory.close();
  }
 private void checkSpans(
     Spans spans,
     int expectedNumSpans,
     int expectedNumPayloads,
     int expectedPayloadLength,
     int expectedFirstByte)
     throws IOException {
   assertTrue("spans is null and it shouldn't be", spans != null);
   // each position match should have a span associated with it, since there is just one underlying
   // term query, there should
   // only be one entry in the span
   VerifyingCollector collector = new VerifyingCollector();
   int seen = 0;
   while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
     while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
       collector.reset();
       spans.collect(collector);
       collector.verify(expectedPayloadLength, expectedFirstByte);
       assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size());
       seen++;
     }
   }
   assertEquals("expectedNumSpans", expectedNumSpans, seen);
 }
Esempio n. 4
0
  public void testPayloadsPos0() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
    writer.addDocument(doc);

    final IndexReader readerFromWriter = writer.getReader();
    LeafReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter);

    PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL);

    int count = 0;
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
    assertEquals(0, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());

    // only one doc has "a"
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());

    IndexSearcher is = newSearcher(readerFromWriter);

    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = {stq1, stq2};
    SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

    count = 0;
    boolean sawZero = false;
    if (VERBOSE) {
      System.out.println("\ngetPayloadSpans test");
    }
    PayloadSpanCollector collector = new PayloadSpanCollector();
    Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
    while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        if (VERBOSE) {
          System.out.println(
              "doc "
                  + pspans.docID()
                  + ": span "
                  + pspans.startPosition()
                  + " to "
                  + pspans.endPosition());
        }
        collector.reset();
        pspans.collect(collector);
        sawZero |= pspans.startPosition() == 0;
        for (BytesRef payload : collector.payloads) {
          count++;
          if (VERBOSE) {
            System.out.println("  payload: " + Term.toString(payload));
          }
        }
      }
    }
    assertTrue(sawZero);
    assertEquals(8, count);

    // System.out.println("\ngetSpans test");
    Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
    count = 0;
    sawZero = false;
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        count++;
        sawZero |= spans.startPosition() == 0;
        // System.out.println(spans.doc() + " - " + spans.start() + " - " +
        // spans.end());
      }
    }
    assertEquals(4, count);
    assertTrue(sawZero);

    writer.close();
    is.getIndexReader().close();
    dir.close();
  }