/**
   * Tests the IndexReader.getFieldNames implementation
   *
   * @throws Exception on error
   */
  public void testFilterIndexReader() throws Exception {
    Directory directory = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));

    Document d1 = new Document();
    d1.add(newField("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d1);

    Document d2 = new Document();
    d2.add(newField("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d2);

    Document d3 = new Document();
    d3.add(newField("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d3);

    writer.close();

    IndexReader reader = new TestReader(IndexReader.open(directory, true));
    TermEnum terms = reader.terms();
    while (terms.next()) {
      assertTrue(terms.term().text().indexOf('e') != -1);
    }
    terms.close();

    TermPositions positions = reader.termPositions(new Term("default", "one"));
    while (positions.next()) {
      assertTrue((positions.doc() % 2) == 1);
    }

    int NUM_DOCS = 3;

    TermDocs td = reader.termDocs(null);
    for (int i = 0; i < NUM_DOCS; i++) {
      assertTrue(td.next());
      assertEquals(i, td.doc());
      assertEquals(1, td.freq());
    }
    td.close();
    reader.close();
    directory.close();
  }
예제 #2
0
    @Override
    public void load() throws Exception {
      TermPositions tp = null;
      byte[] payloadBuffer = new byte[4]; // four bytes for an int
      try {
        tp = _reader.termPositions(_sizeTerm);

        if (tp == null) return;

        while (tp.next()) {
          if (tp.freq() > 0) {
            tp.nextPosition();
            tp.getPayload(payloadBuffer, 0);
            int len = bytesToInt(payloadBuffer);
            allocate(tp.doc(), Math.min(len, _maxItems), true);
          }
        }
      } finally {
        if (tp != null) tp.close();
      }
    }
예제 #3
0
    ReaderData(IndexReader reader) throws IOException {
      this.reader = reader;
      long minUID = Long.MAX_VALUE;
      long maxUID = Long.MIN_VALUE;

      uidMap = new Long2IntRBTreeMap();
      uidMap.defaultReturnValue(-1);
      int maxDoc = reader.maxDoc();
      if (maxDoc == 0) {
        _minUID = Long.MIN_VALUE;
        _maxUID = Long.MIN_VALUE;
        return;
      }
      TermPositions tp = null;
      byte[] payloadBuffer = new byte[8]; // four bytes for a long
      try {
        tp = reader.termPositions(ZoieSegmentReader.UID_TERM);
        while (tp.next()) {
          int doc = tp.doc();
          assert doc < maxDoc;

          tp.nextPosition();
          tp.getPayload(payloadBuffer, 0);
          long uid = ZoieSegmentReader.bytesToLong(payloadBuffer);
          if (uid < minUID) minUID = uid;
          if (uid > maxUID) maxUID = uid;
          uidMap.put(uid, doc);
        }
      } finally {
        if (tp != null) {
          tp.close();
        }
      }

      _minUID = minUID;
      _maxUID = maxUID;
    }
예제 #4
0
  private void dumpTerms() throws IOException {
    outputBanner("Terms (in Term.compareTo() order)");

    TermEnum terms = mIndexReader.terms();
    int order = 0;

    while (terms.next()) {
      order++;
      Term term = terms.term();
      String field = term.field();
      String text = term.text();

      if (!wantThisTerm(field, text)) {
        continue;
      }

      outputLn(order + " " + field + ": " + text);

      /*
       * for each term, print the
       * <document, frequency, <position>* > tuples for a term.
       *
       * document:  document in which the Term appears
       * frequency: number of time the Term appears in the document
       * position:  position for each appearance in the document
       *
       * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED));
       *      then the tuple for Term("field", "two") in this document would be like:
       *      88, 2, <2, 4>
       *      where
       *      88 is the document number
       *      2  is the frequency this term appear in the document
       *      <2, 4> are the positions for each appearance in the document
       */
      // by TermPositions
      outputLn("    document, frequency, <position>*");

      // keep track of docs that appear in all terms that are filtered in.
      Set<Integer> docNums = null;
      if (hasFilters()) {
        docNums = new HashSet<Integer>();
      }

      TermPositions termPos = mIndexReader.termPositions(term);
      while (termPos.next()) {
        int docNum = termPos.doc();
        int freq = termPos.freq();

        if (docNums != null) {
          docNums.add(docNum);
        }

        output("    " + docNum + ", " + freq + ", <");

        boolean first = true;
        for (int f = 0; f < freq; f++) {
          int positionInDoc = termPos.nextPosition();
          if (!first) {
            output(" ");
          } else {
            first = false;
          }
          output(positionInDoc + "");
        }
        outputLn(">");
      }
      termPos.close();

      if (docNums != null) {
        computeDocsIntersection(docNums);
      }

      outputLn();

      if (order % 1000 == 0) {
        mConsole.debug("Dumped " + order + " terms");
      }
    }

    terms.close();
  }
예제 #5
0
 @Override
 protected TermDocs termDocs(IndexReader reader) throws IOException {
   return reader.termPositions();
 }
 /** @see LuceneIndexReader#termPositions(Term) */
 public TermPositions termPositions(Term term) throws IOException {
   return indexReader.termPositions(term);
 }