private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { int cnt = 0; VerifyingCollector collector = new VerifyingCollector(); while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) System.out.println("\nSpans Dump --"); collector.reset(); spans.collect(collector); assertEquals("payload size", numPayloads[cnt], collector.payloads.size()); cnt++; } } assertEquals("expected numSpans", numSpans, cnt); }
public void testShrinkToAfterShortestMatch3() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer())); Document doc = new Document(); doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = {stq1, stq2}; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); TopDocs topDocs = is.search(snq, 1); Set<String> payloadSet = new HashSet<>(); VerifyingCollector collector = new VerifyingCollector(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); for (final BytesRef payload : collector.payloads) { payloadSet.add(Term.toString(payload)); } } } } assertEquals(2, payloadSet.size()); if (VERBOSE) { for (final String payload : payloadSet) System.out.println("match:" + payload); } assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); reader.close(); directory.close(); }
private void checkSpans( Spans spans, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte) throws IOException { assertTrue("spans is null and it shouldn't be", spans != null); // each position match should have a span associated with it, since there is just one underlying // term query, there should // only be one entry in the span VerifyingCollector collector = new VerifyingCollector(); int seen = 0; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); collector.verify(expectedPayloadLength, expectedFirstByte); assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size()); seen++; } } assertEquals("expectedNumSpans", expectedNumSpans, seen); }
public void testPayloadsPos0() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer()); Document doc = new Document(); doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); writer.addDocument(doc); final IndexReader readerFromWriter = writer.getReader(); LeafReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter); PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL); int count = 0; assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // "a" occurs 4 times assertEquals(4, tp.freq()); assertEquals(0, tp.nextPosition()); assertEquals(1, tp.nextPosition()); assertEquals(3, tp.nextPosition()); assertEquals(6, tp.nextPosition()); // only one doc has "a" assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc()); IndexSearcher is = newSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = {stq1, stq2}; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; boolean sawZero = false; if (VERBOSE) { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { System.out.println( "doc " + pspans.docID() + ": span " + pspans.startPosition() + " to " + pspans.endPosition()); } collector.reset(); pspans.collect(collector); sawZero |= pspans.startPosition() == 0; for (BytesRef payload : collector.payloads) { count++; if (VERBOSE) { System.out.println(" payload: " + Term.toString(payload)); } } } } assertTrue(sawZero); assertEquals(8, count); // System.out.println("\ngetSpans test"); Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { count++; sawZero |= spans.startPosition() == 0; // System.out.println(spans.doc() + " - " + spans.start() + " - " + // spans.end()); } } assertEquals(4, count); assertTrue(sawZero); writer.close(); is.getIndexReader().close(); dir.close(); }