/* * listPostings displays the first n postings for a term in a * field in an index (specified by reader). Set n to MAX_VALUE * to display all postings. */ static void listPostings(IndexReader reader, String termString, String field, Integer n) throws IOException { System.out.println("\nPostings: " + termString + " " + field); /* * Prepare to access the index. */ BytesRef termBytes = new BytesRef(termString); Term term = new Term(field, termBytes); Bits liveDocs = MultiFields.getLiveDocs(reader); /* * Lookup the collection term frequency (ctf). */ long df = reader.docFreq(term); System.out.println("\tdf: " + df); long ctf = reader.totalTermFreq(term); System.out.println("\tctf: " + ctf); if (df < 1) return; /* * Lookup the inverted list. */ DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(reader, liveDocs, field, termBytes); /* * Iterate through the first n postings. */ long count = 0; while ((count < n) && (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)) { System.out.println("\tdocid: " + postings.docID()); int tf = postings.freq(); System.out.println("\ttf: " + tf); System.out.print("\tPositions: "); for (int j = 0; j < tf; j++) { int pos = postings.nextPosition(); System.out.print(pos + " "); } System.out.println(""); count++; } ; return; }
public void testSetPosition() throws Exception { Analyzer analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents( new Tokenizer() { // TODO: use CannedTokenStream private final String[] TOKENS = {"1", "2", "3", "4", "5"}; private final int[] INCREMENTS = {1, 2, 1, 0, 1}; private int i = 0; PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override public boolean incrementToken() { if (i == TOKENS.length) return false; clearAttributes(); termAtt.append(TOKENS[i]); offsetAtt.setOffset(i, i); posIncrAtt.setPositionIncrement(INCREMENTS[i]); i++; return true; } @Override public void reset() throws IOException { super.reset(); this.i = 0; } }); } }; Directory store = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), store, analyzer); Document d = new Document(); d.add(newTextField("field", "bogus", Field.Store.YES)); writer.addDocument(d); IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(reader); PostingsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), "field", new BytesRef("1")); pos.nextDoc(); // first token should be at position 0 assertEquals(0, pos.nextPosition()); pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), "field", new BytesRef("2")); pos.nextDoc(); // second token should be at position 2 assertEquals(2, pos.nextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery("field", "1", "2"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); // same as previous, using the builder with implicit positions PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("field", "1")); builder.add(new Term("field", "2")); q = builder.build(); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); // same as previous, just specify positions explicitely. builder = new PhraseQuery.Builder(); builder.add(new Term("field", "1"), 0); builder.add(new Term("field", "2"), 1); q = builder.build(); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); // specifying correct positions should find the phrase. builder = new PhraseQuery.Builder(); builder.add(new Term("field", "1"), 0); builder.add(new Term("field", "2"), 2); q = builder.build(); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "2", "3"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "3", "4"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); // phrase query would find it when correct positions are specified. builder = new PhraseQuery.Builder(); builder.add(new Term("field", "3"), 0); builder.add(new Term("field", "4"), 0); q = builder.build(); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. builder = new PhraseQuery.Builder(); builder.add(new Term("field", "3"), 0); builder.add(new Term("field", "9"), 0); q = builder.build(); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.add(new Term[] {new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.search(mq, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "2", "4"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "3", "5"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "4", "5"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(1, hits.length); q = new PhraseQuery("field", "2", "5"); hits = searcher.search(q, 1000).scoreDocs; assertEquals(0, hits.length); reader.close(); store.close(); }