Пример #1
0
  /*
   *  listPostings displays the first n postings for a term in a
   *  field in an index (specified by reader).  Set n to MAX_VALUE
   *  to display all postings.
   */
  static void listPostings(IndexReader reader, String termString, String field, Integer n)
      throws IOException {

    System.out.println("\nPostings:  " + termString + " " + field);

    /*
     *  Prepare to access the index.
     */
    BytesRef termBytes = new BytesRef(termString);
    Term term = new Term(field, termBytes);
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    /*
     *  Lookup the collection term frequency (ctf).
     */
    long df = reader.docFreq(term);
    System.out.println("\tdf:  " + df);

    long ctf = reader.totalTermFreq(term);
    System.out.println("\tctf:  " + ctf);

    if (df < 1) return;

    /*
     *  Lookup the inverted list.
     */
    DocsAndPositionsEnum postings =
        MultiFields.getTermPositionsEnum(reader, liveDocs, field, termBytes);

    /*
     *  Iterate through the first n postings.
     */
    long count = 0;

    while ((count < n) && (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)) {

      System.out.println("\tdocid: " + postings.docID());
      int tf = postings.freq();
      System.out.println("\ttf: " + tf);
      System.out.print("\tPositions: ");

      for (int j = 0; j < tf; j++) {
        int pos = postings.nextPosition();
        System.out.print(pos + " ");
      }

      System.out.println("");

      count++;
    }
    ;

    return;
  }
Пример #2
0
  public void testSetPosition() throws Exception {
    Analyzer analyzer =
        new Analyzer() {
          @Override
          public TokenStreamComponents createComponents(String fieldName) {
            return new TokenStreamComponents(
                new Tokenizer() {
                  // TODO: use CannedTokenStream
                  private final String[] TOKENS = {"1", "2", "3", "4", "5"};
                  private final int[] INCREMENTS = {1, 2, 1, 0, 1};
                  private int i = 0;

                  PositionIncrementAttribute posIncrAtt =
                      addAttribute(PositionIncrementAttribute.class);
                  CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
                  OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

                  @Override
                  public boolean incrementToken() {
                    if (i == TOKENS.length) return false;
                    clearAttributes();
                    termAtt.append(TOKENS[i]);
                    offsetAtt.setOffset(i, i);
                    posIncrAtt.setPositionIncrement(INCREMENTS[i]);
                    i++;
                    return true;
                  }

                  @Override
                  public void reset() throws IOException {
                    super.reset();
                    this.i = 0;
                  }
                });
          }
        };
    Directory store = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), store, analyzer);
    Document d = new Document();
    d.add(newTextField("field", "bogus", Field.Store.YES));
    writer.addDocument(d);
    IndexReader reader = writer.getReader();
    writer.close();

    IndexSearcher searcher = newSearcher(reader);

    PostingsEnum pos =
        MultiFields.getTermPositionsEnum(searcher.getIndexReader(), "field", new BytesRef("1"));
    pos.nextDoc();
    // first token should be at position 0
    assertEquals(0, pos.nextPosition());

    pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), "field", new BytesRef("2"));
    pos.nextDoc();
    // second token should be at position 2
    assertEquals(2, pos.nextPosition());

    PhraseQuery q;
    ScoreDoc[] hits;

    q = new PhraseQuery("field", "1", "2");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    // same as previous, using the builder with implicit positions
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(new Term("field", "1"));
    builder.add(new Term("field", "2"));
    q = builder.build();
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    // same as previous, just specify positions explicitely.
    builder = new PhraseQuery.Builder();
    builder.add(new Term("field", "1"), 0);
    builder.add(new Term("field", "2"), 1);
    q = builder.build();
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    // specifying correct positions should find the phrase.
    builder = new PhraseQuery.Builder();
    builder.add(new Term("field", "1"), 0);
    builder.add(new Term("field", "2"), 2);
    q = builder.build();
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "2", "3");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "3", "4");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    // phrase query would find it when correct positions are specified.
    builder = new PhraseQuery.Builder();
    builder.add(new Term("field", "3"), 0);
    builder.add(new Term("field", "4"), 0);
    q = builder.build();
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    // phrase query should fail for non existing searched term
    // even if there exist another searched terms in the same searched position.
    builder = new PhraseQuery.Builder();
    builder.add(new Term("field", "3"), 0);
    builder.add(new Term("field", "9"), 0);
    q = builder.build();
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    // multi-phrase query should succed for non existing searched term
    // because there exist another searched terms in the same searched position.
    MultiPhraseQuery mq = new MultiPhraseQuery();
    mq.add(new Term[] {new Term("field", "3"), new Term("field", "9")}, 0);
    hits = searcher.search(mq, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "2", "4");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "3", "5");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "4", "5");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(1, hits.length);

    q = new PhraseQuery("field", "2", "5");
    hits = searcher.search(q, 1000).scoreDocs;
    assertEquals(0, hits.length);

    reader.close();
    store.close();
  }