Exemplo n.º 1
0
 public static int docId(IndexReader reader, Term term) throws IOException {
   TermDocs termDocs = reader.termDocs(term);
   try {
     if (termDocs.next()) {
       return termDocs.doc();
     }
     return NO_DOC;
   } finally {
     termDocs.close();
   }
 }
  /**
   * Tests the IndexReader.getFieldNames implementation
   *
   * @throws Exception on error
   */
  public void testFilterIndexReader() throws Exception {
    Directory directory = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));

    Document d1 = new Document();
    d1.add(newField("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d1);

    Document d2 = new Document();
    d2.add(newField("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d2);

    Document d3 = new Document();
    d3.add(newField("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(d3);

    writer.close();

    IndexReader reader = new TestReader(IndexReader.open(directory, true));
    TermEnum terms = reader.terms();
    while (terms.next()) {
      assertTrue(terms.term().text().indexOf('e') != -1);
    }
    terms.close();

    TermPositions positions = reader.termPositions(new Term("default", "one"));
    while (positions.next()) {
      assertTrue((positions.doc() % 2) == 1);
    }

    int NUM_DOCS = 3;

    TermDocs td = reader.termDocs(null);
    for (int i = 0; i < NUM_DOCS; i++) {
      assertTrue(td.next());
      assertEquals(i, td.doc());
      assertEquals(1, td.freq());
    }
    td.close();
    reader.close();
    directory.close();
  }
  public void testSkipTo(int indexDivisor) throws IOException {
    Directory dir = new RAMDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

    Term ta = new Term("content", "aaa");
    for (int i = 0; i < 10; i++) addDoc(writer, "aaa aaa aaa aaa");

    Term tb = new Term("content", "bbb");
    for (int i = 0; i < 16; i++) addDoc(writer, "bbb bbb bbb bbb");

    Term tc = new Term("content", "ccc");
    for (int i = 0; i < 50; i++) addDoc(writer, "ccc ccc ccc ccc");

    // assure that we deal with a single segment
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(dir);
    reader.setTermInfosIndexDivisor(indexDivisor);
    assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());

    TermDocs tdocs = reader.termDocs();

    // without optimization (assumption skipInterval == 16)

    // with next
    tdocs.seek(ta);
    assertTrue(tdocs.next());
    assertEquals(0, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(1, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(0));
    assertEquals(2, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // without next
    tdocs.seek(ta);
    assertTrue(tdocs.skipTo(0));
    assertEquals(0, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // exactly skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tb);
    assertTrue(tdocs.next());
    assertEquals(10, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(11, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(12, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // without next
    tdocs.seek(tb);
    assertTrue(tdocs.skipTo(5));
    assertEquals(10, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // much more than skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tc);
    assertTrue(tdocs.next());
    assertEquals(26, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(27, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(28, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    // without next
    tdocs.seek(tc);
    assertTrue(tdocs.skipTo(5));
    assertEquals(26, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    tdocs.close();
    reader.close();
    dir.close();
  }
Exemplo n.º 4
0
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println("TermDumper [-c|-v value] field <index...>");
      System.exit(1);
    }

    boolean count = false;
    String value = null;
    boolean all = false;

    int i = 0;
    for (; i < args.length; i++) {
      String arg = args[i];

      if ("-h".equals(arg) || "--help".equals(arg)) {
        System.err.println("TermDumper [-c|-v value] field <index...>");
        System.exit(1);
      } else if ("-c".equals(arg) || "--count".equals(arg)) {
        count = true;
      } else if ("-v".equals(arg) || "--vaue".equals(arg)) {
        value = args[++i];
      } else if ("-a".equals(arg) || "--all".equals(arg)) {
        all = true;
      } else {
        break;
      }
    }

    String field = args[i++];

    java.util.ArrayList<IndexReader> readers =
        new java.util.ArrayList<IndexReader>(args.length - 1);
    for (; i < args.length; i++) {
      String arg = args[i];
      try {
        IndexReader reader = IndexReader.open(new MMapDirectory(new File(arg)), true);

        readers.add(reader);
      } catch (IOException ioe) {
        System.err.println("Error reading: " + arg);
      }
    }

    for (IndexReader reader : readers) {
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms(new Term(field));

      try {
        do {
          Term term = termEnum.term();

          if (term == null || !field.equals(term.field())) break;

          if (value == null) {
            if (count) {
              termDocs.seek(termEnum);

              int c = 0;
              for (; termDocs.next(); c++) ;

              System.out.print(c + " ");
            }
            System.out.println(term.text());
          } else if (value.equals(term.text())) {
            termDocs.seek(termEnum);

            while (termDocs.next()) {
              if (all) {
                Document d = reader.document(termDocs.doc());
                System.out.println(termDocs.doc());
                for (Object o : d.getFields()) {
                  Field f = (Field) o;
                  System.out.println(f.name() + " " + d.get(f.name()));
                }
              } else {
                System.out.println(
                    termDocs.doc() + " " + reader.document(termDocs.doc()).get("url"));
              }
            }
          }
        } while (termEnum.next());
      } finally {
        termDocs.close();
        termEnum.close();
      }
    }
  }