Пример #1
0
 private TermDocs termDocs(int i) throws IOException {
   TermDocs result = readerTermDocs[i];
   if (result == null) result = readerTermDocs[i] = termDocs(readers[i]);
   if (smi != null) {
     assert (smi.ord == i);
     assert (smi.termEnum.term().equals(term));
     result.seek(smi.termEnum);
   } else {
     result.seek(term);
   }
   return result;
 }
  public void testSkipTo(int indexDivisor) throws IOException {
    Directory dir = new RAMDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

    Term ta = new Term("content", "aaa");
    for (int i = 0; i < 10; i++) addDoc(writer, "aaa aaa aaa aaa");

    Term tb = new Term("content", "bbb");
    for (int i = 0; i < 16; i++) addDoc(writer, "bbb bbb bbb bbb");

    Term tc = new Term("content", "ccc");
    for (int i = 0; i < 50; i++) addDoc(writer, "ccc ccc ccc ccc");

    // assure that we deal with a single segment
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(dir);
    reader.setTermInfosIndexDivisor(indexDivisor);
    assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());

    TermDocs tdocs = reader.termDocs();

    // without optimization (assumption skipInterval == 16)

    // with next
    tdocs.seek(ta);
    assertTrue(tdocs.next());
    assertEquals(0, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(1, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(0));
    assertEquals(2, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // without next
    tdocs.seek(ta);
    assertTrue(tdocs.skipTo(0));
    assertEquals(0, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // exactly skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tb);
    assertTrue(tdocs.next());
    assertEquals(10, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(11, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(12, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // without next
    tdocs.seek(tb);
    assertTrue(tdocs.skipTo(5));
    assertEquals(10, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // much more than skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tc);
    assertTrue(tdocs.next());
    assertEquals(26, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(27, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(28, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    // without next
    tdocs.seek(tc);
    assertTrue(tdocs.skipTo(5));
    assertEquals(26, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    tdocs.close();
    reader.close();
    dir.close();
  }
Пример #3
0
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println("TermDumper [-c|-v value] field <index...>");
      System.exit(1);
    }

    boolean count = false;
    String value = null;
    boolean all = false;

    int i = 0;
    for (; i < args.length; i++) {
      String arg = args[i];

      if ("-h".equals(arg) || "--help".equals(arg)) {
        System.err.println("TermDumper [-c|-v value] field <index...>");
        System.exit(1);
      } else if ("-c".equals(arg) || "--count".equals(arg)) {
        count = true;
      } else if ("-v".equals(arg) || "--vaue".equals(arg)) {
        value = args[++i];
      } else if ("-a".equals(arg) || "--all".equals(arg)) {
        all = true;
      } else {
        break;
      }
    }

    String field = args[i++];

    java.util.ArrayList<IndexReader> readers =
        new java.util.ArrayList<IndexReader>(args.length - 1);
    for (; i < args.length; i++) {
      String arg = args[i];
      try {
        IndexReader reader = IndexReader.open(new MMapDirectory(new File(arg)), true);

        readers.add(reader);
      } catch (IOException ioe) {
        System.err.println("Error reading: " + arg);
      }
    }

    for (IndexReader reader : readers) {
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms(new Term(field));

      try {
        do {
          Term term = termEnum.term();

          if (term == null || !field.equals(term.field())) break;

          if (value == null) {
            if (count) {
              termDocs.seek(termEnum);

              int c = 0;
              for (; termDocs.next(); c++) ;

              System.out.print(c + " ");
            }
            System.out.println(term.text());
          } else if (value.equals(term.text())) {
            termDocs.seek(termEnum);

            while (termDocs.next()) {
              if (all) {
                Document d = reader.document(termDocs.doc());
                System.out.println(termDocs.doc());
                for (Object o : d.getFields()) {
                  Field f = (Field) o;
                  System.out.println(f.name() + " " + d.get(f.name()));
                }
              } else {
                System.out.println(
                    termDocs.doc() + " " + reader.document(termDocs.doc()).get("url"));
              }
            }
          }
        } while (termEnum.next());
      } finally {
        termDocs.close();
        termEnum.close();
      }
    }
  }