Java IndexWriter.addDocument示例，org.apache.lucene.document.IndexWriter.addDocument Java示例

示例#1

0

显示文件

文件： TestMultiFields.java 项目： joseerlang/Lucene-solr

 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }

示例#2

0

显示文件

文件： TestMultiFields.java 项目： joseerlang/Lucene-solr

 public void testSeparateEnums() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum d1 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   DocsEnum d2 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   assertEquals(0, d1.nextDoc());
   assertEquals(0, d2.nextDoc());
   r.close();
   dir.close();
 }

示例#3

0

显示文件

文件： TestSearchForDuplicates.java 项目： zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

  private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS)
      throws Exception {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    final MergePolicy mp = conf.getMergePolicy();
    mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
    IndexWriter writer = new IndexWriter(directory, conf);
    if (VERBOSE) {
      System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
    }

    for (int j = 0; j < MAX_DOCS; j++) {
      Document d = new Document();
      d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
      d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES));
      writer.addDocument(d);
    }
    writer.close();

    // try a search without OR
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(reader);

    Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
    out.println("Query: " + query.toString(PRIORITY_FIELD));
    if (VERBOSE) {
      System.out.println("TEST: search query=" + query);
    }

    final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT));

    ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    // try a new search with OR
    searcher = newSearcher(reader);
    hits = null;

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
    out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD));

    hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    reader.close();
    directory.close();
  }

示例#4

0

显示文件

文件： FieldDataTermsFilterTests.java 项目： kurtado/elasticsearch

  @Before
  public void setup() throws Exception {
    super.setUp();

    // setup field mappers
    strMapper =
        new StringFieldMapper.Builder("str_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    lngMapper =
        new LongFieldMapper.Builder("lng_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    dblMapper =
        new DoubleFieldMapper.Builder("dbl_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    // create index and fielddata service
    ifdService = new IndexFieldDataService(new Index("test"), new DummyCircuitBreakerService());
    MapperService mapperService =
        MapperTestUtils.newMapperService(
            ifdService.index(), ImmutableSettings.Builder.EMPTY_SETTINGS);
    ifdService.setIndexService(new StubIndexService(mapperService));
    writer =
        new IndexWriter(
            new RAMDirectory(),
            new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));

    int numDocs = 10;
    for (int i = 0; i < numDocs; i++) {
      Document d = new Document();
      d.add(new StringField(strMapper.names().indexName(), "str" + i, Field.Store.NO));
      d.add(new LongField(lngMapper.names().indexName(), i, Field.Store.NO));
      d.add(new DoubleField(dblMapper.names().indexName(), Double.valueOf(i), Field.Store.NO));
      writer.addDocument(d);
    }

    reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
  }

示例#5

0

显示文件

文件： TestMultiFields.java 项目： joseerlang/Lucene-solr

  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }

示例#6

0

显示文件

文件： TestRollingUpdates.java 项目： PATRIC3/p3_solr

  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }

示例#7

0

显示文件