Exemplo n.º 1
0
  /*
   *  Utility function to display a term vector.
   */
  static void termVectorDisplay(Terms terms) throws IOException {

    if ((terms == null) || (terms.size() == -1)) System.out.println("    The field is not stored.");
    else {
      /*
       *  The terms for this field are stored.
       */
      System.out.println("    Vocabulary size: " + terms.size() + " terms");

      TermsEnum ithTerm = terms.iterator(null);

      /*
       *  Iterate over the terms in this document.
       *  Information about a term's occurrences (tf and
       *  positions) is accessed via the indexing API, which
       *  returns inverted lists that describe (only) the
       *  current document.
       */
      while (ithTerm.next() != null) {
        System.out.format(
            "      %10d %-20s %d ",
            ithTerm.ord(), ithTerm.term().utf8ToString(), ithTerm.totalTermFreq());

        DocsAndPositionsEnum currDoc = ithTerm.docsAndPositions(null, null);
        currDoc.nextDoc();

        for (int jthPosition = 0; jthPosition < ithTerm.totalTermFreq(); jthPosition++)
          System.out.print(currDoc.nextPosition() + " ");

        System.out.println();
      }
      ;
    }
    ;
  }
Exemplo n.º 2
0
  /*
   *  listPostings displays the first n postings for a term in a
   *  field in an index (specified by reader).  Set n to MAX_VALUE
   *  to display all postings.
   */
  static void listPostings(IndexReader reader, String termString, String field, Integer n)
      throws IOException {

    System.out.println("\nPostings:  " + termString + " " + field);

    /*
     *  Prepare to access the index.
     */
    BytesRef termBytes = new BytesRef(termString);
    Term term = new Term(field, termBytes);
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    /*
     *  Lookup the collection term frequency (ctf).
     */
    long df = reader.docFreq(term);
    System.out.println("\tdf:  " + df);

    long ctf = reader.totalTermFreq(term);
    System.out.println("\tctf:  " + ctf);

    if (df < 1) return;

    /*
     *  Lookup the inverted list.
     */
    DocsAndPositionsEnum postings =
        MultiFields.getTermPositionsEnum(reader, liveDocs, field, termBytes);

    /*
     *  Iterate through the first n postings.
     */
    long count = 0;

    while ((count < n) && (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)) {

      System.out.println("\tdocid: " + postings.docID());
      int tf = postings.freq();
      System.out.println("\ttf: " + tf);
      System.out.print("\tPositions: ");

      for (int j = 0; j < tf; j++) {
        int pos = postings.nextPosition();
        System.out.print(pos + " ");
      }

      System.out.println("");

      count++;
    }
    ;

    return;
  }