/**
  * By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to
  * do other things.
  *
  * @param payLoads The payloads
  * @param start The start position of the span being scored
  * @param end The end position of the span being scored
  * @see Spans
  */
 protected void processPayloads(Collection<byte[]> payLoads, int start, int end) {
   for (final byte[] thePayload : payLoads) {
     payloadScore =
         function.currentScore(
             doc,
             fieldName,
             start,
             end,
             payloadsSeen,
             payloadScore,
             similarity.scorePayload(
                 doc, spans.start(), spans.end(), thePayload, 0, thePayload.length));
     ++payloadsSeen;
   }
 }
コード例 #2
0
  private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
    int cnt = 0;
    VerifyingCollector collector = new VerifyingCollector();
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        if (VERBOSE) System.out.println("\nSpans Dump --");
        collector.reset();
        spans.collect(collector);
        assertEquals("payload size", numPayloads[cnt], collector.payloads.size());

        cnt++;
      }
    }

    assertEquals("expected numSpans", numSpans, cnt);
  }
コード例 #3
0
  private void dumpSpans(SpanQuery query) throws IOException {
    Spans spans = query.getSpans(reader);
    System.out.println(query + ":");
    int numSpans = 0;

    TopDocs hits = searcher.search(query, 10);
    float[] scores = new float[2];
    for (ScoreDoc sd : hits.scoreDocs) {
      scores[sd.doc] = sd.score;
    }

    while (spans.next()) { // A
      numSpans++;

      int id = spans.doc();
      Document doc = reader.document(id); // B

      TokenStream stream =
          analyzer.tokenStream(
              "contents", // C
              new StringReader(doc.get("f"))); // C
      TermAttribute term = stream.addAttribute(TermAttribute.class);

      StringBuilder buffer = new StringBuilder();
      buffer.append("   ");
      int i = 0;
      while (stream.incrementToken()) { // D
        if (i == spans.start()) { // E
          buffer.append("<"); // E
        } // E
        buffer.append(term.term()); // E
        if (i + 1 == spans.end()) { // E
          buffer.append(">"); // E
        } // E
        buffer.append(" ");
        i++;
      }
      buffer.append("(").append(scores[id]).append(") ");
      System.out.println(buffer);
    }

    if (numSpans == 0) {
      System.out.println("   No spans");
    }
    System.out.println();
  }
 @Override
 protected boolean setFreqCurrentDoc() throws IOException {
   if (!more) {
     return false;
   }
   doc = spans.doc();
   freq = 0.0f;
   payloadScore = 0;
   payloadsSeen = 0;
   do {
     int matchLength = spans.end() - spans.start();
     freq += similarity.sloppyFreq(matchLength);
     Spans[] spansArr = new Spans[1];
     spansArr[0] = spans;
     getPayloads(spansArr);
     more = spans.next();
   } while (more && (doc == spans.doc()));
   return true;
 }
コード例 #5
0
  public void testShrinkToAfterShortestMatch3() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));

    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    IndexSearcher is = newSearcher(reader);
    writer.close();

    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = {stq1, stq2};
    SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
    Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);

    TopDocs topDocs = is.search(snq, 1);
    Set<String> payloadSet = new HashSet<>();
    VerifyingCollector collector = new VerifyingCollector();
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
      while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
          collector.reset();
          spans.collect(collector);
          for (final BytesRef payload : collector.payloads) {
            payloadSet.add(Term.toString(payload));
          }
        }
      }
    }
    assertEquals(2, payloadSet.size());
    if (VERBOSE) {
      for (final String payload : payloadSet) System.out.println("match:" + payload);
    }
    assertTrue(payloadSet.contains("a:Noise:10"));
    assertTrue(payloadSet.contains("k:Noise:11"));
    reader.close();
    directory.close();
  }
コード例 #6
0
 private void checkSpans(
     Spans spans,
     int expectedNumSpans,
     int expectedNumPayloads,
     int expectedPayloadLength,
     int expectedFirstByte)
     throws IOException {
   assertTrue("spans is null and it shouldn't be", spans != null);
   // each position match should have a span associated with it, since there is just one underlying
   // term query, there should
   // only be one entry in the span
   VerifyingCollector collector = new VerifyingCollector();
   int seen = 0;
   while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
     while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
       collector.reset();
       spans.collect(collector);
       collector.verify(expectedPayloadLength, expectedFirstByte);
       assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size());
       seen++;
     }
   }
   assertEquals("expectedNumSpans", expectedNumSpans, seen);
 }
コード例 #7
0
  public void testPayloadsPos0() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
    writer.addDocument(doc);

    final IndexReader readerFromWriter = writer.getReader();
    LeafReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter);

    PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL);

    int count = 0;
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
    assertEquals(0, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());

    // only one doc has "a"
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());

    IndexSearcher is = newSearcher(readerFromWriter);

    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = {stq1, stq2};
    SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

    count = 0;
    boolean sawZero = false;
    if (VERBOSE) {
      System.out.println("\ngetPayloadSpans test");
    }
    PayloadSpanCollector collector = new PayloadSpanCollector();
    Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
    while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        if (VERBOSE) {
          System.out.println(
              "doc "
                  + pspans.docID()
                  + ": span "
                  + pspans.startPosition()
                  + " to "
                  + pspans.endPosition());
        }
        collector.reset();
        pspans.collect(collector);
        sawZero |= pspans.startPosition() == 0;
        for (BytesRef payload : collector.payloads) {
          count++;
          if (VERBOSE) {
            System.out.println("  payload: " + Term.toString(payload));
          }
        }
      }
    }
    assertTrue(sawZero);
    assertEquals(8, count);

    // System.out.println("\ngetSpans test");
    Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
    count = 0;
    sawZero = false;
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        count++;
        sawZero |= spans.startPosition() == 0;
        // System.out.println(spans.doc() + " - " + spans.start() + " - " +
        // spans.end());
      }
    }
    assertEquals(4, count);
    assertTrue(sawZero);

    writer.close();
    is.getIndexReader().close();
    dir.close();
  }