Java Document Examples, org.apache.lucene.store.Document Java Examples

Example #1

0

Show file

File: TestSearchForDuplicates.java Project: zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

 private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher)
     throws IOException {
   assertEquals("total results", expectedCount, hits.length);
   for (int i = 0; i < hits.length; i++) {
     if (i < 10 || (i > 94 && i < 105)) {
       Document d = searcher.doc(hits[i].doc);
       assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
     }
   }
 }

Example #2

0

Show file

File: TestSearchForDuplicates.java Project: zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

 private void printHits(PrintWriter out, ScoreDoc[] hits, IndexSearcher searcher)
     throws IOException {
   out.println(hits.length + " total results\n");
   for (int i = 0; i < hits.length; i++) {
     if (i < 10 || (i > 94 && i < 105)) {
       Document d = searcher.doc(hits[i].doc);
       out.println(i + " " + d.get(ID_FIELD));
     }
   }
 }

Example #3

0

Show file

File: TestSearchForDuplicates.java Project: zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

  private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS)
      throws Exception {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    final MergePolicy mp = conf.getMergePolicy();
    mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
    IndexWriter writer = new IndexWriter(directory, conf);
    if (VERBOSE) {
      System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
    }

    for (int j = 0; j < MAX_DOCS; j++) {
      Document d = new Document();
      d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
      d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES));
      writer.addDocument(d);
    }
    writer.close();

    // try a search without OR
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(reader);

    Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
    out.println("Query: " + query.toString(PRIORITY_FIELD));
    if (VERBOSE) {
      System.out.println("TEST: search query=" + query);
    }

    final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT));

    ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    // try a new search with OR
    searcher = newSearcher(reader);
    hits = null;

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
    out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD));

    hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    reader.close();
    directory.close();
  }

Example #4

0

Show file

File: TestMultiFields.java Project: joseerlang/Lucene-solr

 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }

Example #5

0

Show file

File: TestMultiFields.java Project: joseerlang/Lucene-solr

 public void testSeparateEnums() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum d1 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   DocsEnum d2 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   assertEquals(0, d1.nextDoc());
   assertEquals(0, d2.nextDoc());
   r.close();
   dir.close();
 }

Example #6

0

Show file

File: TestRollingUpdates.java Project: PATRIC3/p3_solr

 @Override
 public void run() {
   try {
     DirectoryReader open = null;
     for (int i = 0; i < num; i++) {
       Document doc = new Document(); // docs.nextDoc();
       BytesRef br = new BytesRef("test");
       doc.add(newStringField("id", br, Field.Store.NO));
       writer.updateDocument(new Term("id", br), doc);
       if (random().nextInt(3) == 0) {
         if (open == null) {
           open = DirectoryReader.open(writer, true);
         }
         DirectoryReader reader = DirectoryReader.openIfChanged(open);
         if (reader != null) {
           open.close();
           open = reader;
         }
         assertEquals(
             "iter: "
                 + i
                 + " numDocs: "
                 + open.numDocs()
                 + " del: "
                 + open.numDeletedDocs()
                 + " max: "
                 + open.maxDoc(),
             1,
             open.numDocs());
       }
     }
     if (open != null) {
       open.close();
     }
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }

Example #7

0

Show file

File: TestMultiFields.java Project: joseerlang/Lucene-solr

  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }

Example #8

0

Show file

File: TestRollingUpdates.java Project: PATRIC3/p3_solr

  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }

Example #9

0

Show file

File: SearchFilesDefault.java Project: udaiarora/IR

  public static void main(String[] args) throws Exception {
    // the IndexReader object is the main handle that will give you
    // all the documents, terms and inverted index
    IndexReader r = IndexReader.open(FSDirectory.open(new File("index")));

    // You can figure out the number of documents using the maxDoc() function
    System.out.println("The number of documents in this index is: " + r.maxDoc());

    int i = 0;
    // You can find out all the terms that have been indexed using the terms() function
    TermEnum t = r.terms();
    while (t.next()) {
      // Since there are so many terms, let us try printing only term #100000-#100010
      if (i > 100000) System.out.println("[" + i + "] " + t.term().text());
      if (++i > 100010) break;
    }

    // You can create your own query terms by calling the Term constructor, with the field
    // 'contents'
    // In the following example, the query term is 'brute'
    Term te = new Term("contents", "brute");

    // You can also quickly find out the number of documents that have term t
    System.out.println("Number of documents with the word 'brute' is: " + r.docFreq(te));

    // You can use the inverted index to find out all the documents that contain the term 'brute'
    //  by using the termDocs function
    TermDocs td = r.termDocs(te);
    while (td.next()) {
      System.out.println(
          "Document number ["
              + td.doc()
              + "] contains the term 'brute' "
              + td.freq()
              + " time(s).");
    }

    // You can find the URL of the a specific document number using the document() function
    // For example, the URL for document number 14191 is:
    Document d = r.document(14191);
    String url =
        d.getFieldable("path")
            .stringValue(); // the 'path' field of the Document object holds the URL
    System.out.println(url.replace("%%", "/"));

    // -------- Now let us use all of the functions above to make something useful --------
    // The following bit of code is a worked out example of how to get a bunch of documents
    // in response to a query and show them (without ranking them according to TF/IDF)
    Scanner sc = new Scanner(System.in);
    String str = "";
    System.out.print("query> ");
    while (!(str = sc.nextLine()).equals("quit")) {
      String[] terms = str.split("\\s+");
      for (String word : terms) {
        Term term = new Term("contents", word);
        TermDocs tdocs = r.termDocs(term);
        while (tdocs.next()) {
          String d_url =
              r.document(tdocs.doc()).getFieldable("path").stringValue().replace("%%", "/");
          System.out.println("[" + tdocs.doc() + "] " + d_url);
        }
      }
      System.out.print("query> ");
    }
  }

Example #10

0

Show file