/** * By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to * do other things. * * @param payLoads The payloads * @param start The start position of the span being scored * @param end The end position of the span being scored * @see Spans */ protected void processPayloads(Collection<byte[]> payLoads, int start, int end) { for (final byte[] thePayload : payLoads) { payloadScore = function.currentScore( doc, fieldName, start, end, payloadsSeen, payloadScore, similarity.scorePayload( doc, spans.start(), spans.end(), thePayload, 0, thePayload.length)); ++payloadsSeen; } }
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { int cnt = 0; VerifyingCollector collector = new VerifyingCollector(); while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) System.out.println("\nSpans Dump --"); collector.reset(); spans.collect(collector); assertEquals("payload size", numPayloads[cnt], collector.payloads.size()); cnt++; } } assertEquals("expected numSpans", numSpans, cnt); }
private void dumpSpans(SpanQuery query) throws IOException { Spans spans = query.getSpans(reader); System.out.println(query + ":"); int numSpans = 0; TopDocs hits = searcher.search(query, 10); float[] scores = new float[2]; for (ScoreDoc sd : hits.scoreDocs) { scores[sd.doc] = sd.score; } while (spans.next()) { // A numSpans++; int id = spans.doc(); Document doc = reader.document(id); // B TokenStream stream = analyzer.tokenStream( "contents", // C new StringReader(doc.get("f"))); // C TermAttribute term = stream.addAttribute(TermAttribute.class); StringBuilder buffer = new StringBuilder(); buffer.append(" "); int i = 0; while (stream.incrementToken()) { // D if (i == spans.start()) { // E buffer.append("<"); // E } // E buffer.append(term.term()); // E if (i + 1 == spans.end()) { // E buffer.append(">"); // E } // E buffer.append(" "); i++; } buffer.append("(").append(scores[id]).append(") "); System.out.println(buffer); } if (numSpans == 0) { System.out.println(" No spans"); } System.out.println(); }
@Override protected boolean setFreqCurrentDoc() throws IOException { if (!more) { return false; } doc = spans.doc(); freq = 0.0f; payloadScore = 0; payloadsSeen = 0; do { int matchLength = spans.end() - spans.start(); freq += similarity.sloppyFreq(matchLength); Spans[] spansArr = new Spans[1]; spansArr[0] = spans; getPayloads(spansArr); more = spans.next(); } while (more && (doc == spans.doc())); return true; }
public void testShrinkToAfterShortestMatch3() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer())); Document doc = new Document(); doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = {stq1, stq2}; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); TopDocs topDocs = is.search(snq, 1); Set<String> payloadSet = new HashSet<>(); VerifyingCollector collector = new VerifyingCollector(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); for (final BytesRef payload : collector.payloads) { payloadSet.add(Term.toString(payload)); } } } } assertEquals(2, payloadSet.size()); if (VERBOSE) { for (final String payload : payloadSet) System.out.println("match:" + payload); } assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); reader.close(); directory.close(); }
private void checkSpans( Spans spans, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte) throws IOException { assertTrue("spans is null and it shouldn't be", spans != null); // each position match should have a span associated with it, since there is just one underlying // term query, there should // only be one entry in the span VerifyingCollector collector = new VerifyingCollector(); int seen = 0; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); collector.verify(expectedPayloadLength, expectedFirstByte); assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size()); seen++; } } assertEquals("expectedNumSpans", expectedNumSpans, seen); }
public void testPayloadsPos0() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer()); Document doc = new Document(); doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); writer.addDocument(doc); final IndexReader readerFromWriter = writer.getReader(); LeafReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter); PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL); int count = 0; assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // "a" occurs 4 times assertEquals(4, tp.freq()); assertEquals(0, tp.nextPosition()); assertEquals(1, tp.nextPosition()); assertEquals(3, tp.nextPosition()); assertEquals(6, tp.nextPosition()); // only one doc has "a" assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc()); IndexSearcher is = newSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = {stq1, stq2}; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; boolean sawZero = false; if (VERBOSE) { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { System.out.println( "doc " + pspans.docID() + ": span " + pspans.startPosition() + " to " + pspans.endPosition()); } collector.reset(); pspans.collect(collector); sawZero |= pspans.startPosition() == 0; for (BytesRef payload : collector.payloads) { count++; if (VERBOSE) { System.out.println(" payload: " + Term.toString(payload)); } } } } assertTrue(sawZero); assertEquals(8, count); // System.out.println("\ngetSpans test"); Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { count++; sawZero |= spans.startPosition() == 0; // System.out.println(spans.doc() + " - " + spans.start() + " - " + // spans.end()); } } assertEquals(4, count); assertTrue(sawZero); writer.close(); is.getIndexReader().close(); dir.close(); }