// test the simple case
  public void testSimpleCase() throws IOException {
    String[] keywords = {"1", "2"};
    String[] unindexed = {"Netherlands", "Italy"};
    String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"};
    String[] text = {"Amsterdam", "Venice"};

    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setUseCompoundFile(true);
    modifier.setMaxBufferedDeleteTerms(1);

    for (int i = 0; i < keywords.length; i++) {
      Document doc = new Document();
      doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
      doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
      doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
      modifier.addDocument(doc);
    }
    modifier.optimize();
    modifier.commit();

    Term term = new Term("city", "Amsterdam");
    int hitCount = getHitCount(dir, term);
    assertEquals(1, hitCount);
    modifier.deleteDocuments(term);
    modifier.commit();
    hitCount = getHitCount(dir, term);
    assertEquals(0, hitCount);

    modifier.close();
    dir.close();
  }
  // test when delete terms only apply to disk segments
  public void testNonRAMDelete() throws IOException {

    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    assertEquals(0, modifier.getNumBufferedDocuments());
    assertTrue(0 < modifier.getSegmentCount());

    modifier.commit();

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    modifier.deleteDocuments(new Term("value", String.valueOf(value)));

    modifier.commit();

    reader = IndexReader.open(dir, true);
    assertEquals(0, reader.numDocs());
    reader.close();
    modifier.close();
    dir.close();
  }
    @Override
    public void doWork() throws Throwable {

      IndexWriter writer1 =
          new IndexWriter(
              dir1,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMaxBufferedDocs(3)
                  .setMergeScheduler(new ConcurrentMergeScheduler())
                  .setMergePolicy(newLogMergePolicy(2)));
      ((ConcurrentMergeScheduler) writer1.getConfig().getMergeScheduler()).setSuppressExceptions();

      // Intentionally use different params so flush/merge
      // happen @ different times
      IndexWriter writer2 =
          new IndexWriter(
              dir2,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMaxBufferedDocs(2)
                  .setMergeScheduler(new ConcurrentMergeScheduler())
                  .setMergePolicy(newLogMergePolicy(3)));
      ((ConcurrentMergeScheduler) writer2.getConfig().getMergeScheduler()).setSuppressExceptions();

      update(writer1);
      update(writer2);

      TestTransactions.doFail = true;
      try {
        synchronized (lock) {
          try {
            writer1.prepareCommit();
          } catch (Throwable t) {
            writer1.rollback();
            writer2.rollback();
            return;
          }
          try {
            writer2.prepareCommit();
          } catch (Throwable t) {
            writer1.rollback();
            writer2.rollback();
            return;
          }

          writer1.commit();
          writer2.commit();
        }
      } finally {
        TestTransactions.doFail = false;
      }

      writer1.close();
      writer2.close();
    }
  // LUCENE-1274: test writer.prepareCommit()
  public void testPrepareCommitRollback() throws IOException {
    MockDirectoryWrapper dir = newDirectory();
    dir.setPreventDoubleWrite(false);

    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2)
                .setMergePolicy(newLogMergePolicy(5)));
    writer.commit();

    for (int i = 0; i < 23; i++) TestIndexWriter.addDoc(writer);

    DirectoryReader reader = DirectoryReader.open(dir);
    assertEquals(0, reader.numDocs());

    writer.prepareCommit();

    IndexReader reader2 = DirectoryReader.open(dir);
    assertEquals(0, reader2.numDocs());

    writer.rollback();

    IndexReader reader3 = DirectoryReader.openIfChanged(reader);
    assertNull(reader3);
    assertEquals(0, reader.numDocs());
    assertEquals(0, reader2.numDocs());
    reader.close();
    reader2.close();

    writer =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    for (int i = 0; i < 17; i++) TestIndexWriter.addDoc(writer);

    reader = DirectoryReader.open(dir);
    assertEquals(0, reader.numDocs());
    reader.close();

    writer.prepareCommit();

    reader = DirectoryReader.open(dir);
    assertEquals(0, reader.numDocs());
    reader.close();

    writer.commit();
    reader = DirectoryReader.open(dir);
    assertEquals(17, reader.numDocs());
    reader.close();
    writer.close();
    dir.close();
  }
  // test rollback of deleteAll()
  public void testDeleteAllRollback() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    addDoc(modifier, ++id, value);

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Delete all
    modifier.deleteAll();

    // Roll it back
    modifier.rollback();
    modifier.close();

    // Validate that the docs are still there
    reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    dir.close();
  }
  /**
   * {@link PersistentSnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to enable
   * flexible snapshotting.
   *
   * @param primary the {@link IndexDeletionPolicy} that is used on non-snapshotted commits.
   *     Snapshotted commits, by definition, are not deleted until explicitly released via {@link
   *     #release(String)}.
   * @param dir the {@link Directory} which will be used to persist the snapshots information.
   * @param mode specifies whether a new index should be created, deleting all existing snapshots
   *     information (immediately), or open an existing index, initializing the class with the
   *     snapshots information.
   * @param matchVersion specifies the {@link Version} that should be used when opening the
   *     IndexWriter.
   */
  public PersistentSnapshotDeletionPolicy(
      IndexDeletionPolicy primary, Directory dir, OpenMode mode, Version matchVersion)
      throws CorruptIndexException, LockObtainFailedException, IOException {
    super(primary, null);

    // Initialize the index writer over the snapshot directory.
    writer = new IndexWriter(dir, new IndexWriterConfig(matchVersion, null).setOpenMode(mode));
    if (mode != OpenMode.APPEND) {
      // IndexWriter no longer creates a first commit on an empty Directory. So
      // if we were asked to CREATE*, call commit() just to be sure. If the
      // index contains information and mode is CREATE_OR_APPEND, it's a no-op.
      writer.commit();
    }

    try {
      // Initializes the snapshots information. This code should basically run
      // only if mode != CREATE, but if it is, it's no harm as we only open the
      // reader once and immediately close it.
      for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
        registerSnapshotInfo(e.getKey(), e.getValue(), null);
      }
    } catch (RuntimeException e) {
      writer.close(); // don't leave any open file handles
      throw e;
    } catch (IOException e) {
      writer.close(); // don't leave any open file handles
      throw e;
    }
  }
Beispiel #7
0
  // LUCENE-2996: tests that addIndexes(IndexReader) applies existing deletes correctly.
  public void testExistingDeletes() throws Exception {
    Directory[] dirs = new Directory[2];
    for (int i = 0; i < dirs.length; i++) {
      dirs[i] = newDirectory();
      IndexWriterConfig conf =
          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
      IndexWriter writer = new IndexWriter(dirs[i], conf);
      Document doc = new Document();
      doc.add(new StringField("id", "myid", Field.Store.NO));
      writer.addDocument(doc);
      writer.close();
    }

    IndexWriterConfig conf =
        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dirs[0], conf);

    // Now delete the document
    writer.deleteDocuments(new Term("id", "myid"));
    IndexReader r = DirectoryReader.open(dirs[1]);
    try {
      writer.addIndexes(r);
    } finally {
      r.close();
    }
    writer.commit();
    assertEquals(
        "Documents from the incoming index should not have been deleted", 1, writer.numDocs());
    writer.close();

    for (Directory dir : dirs) {
      dir.close();
    }
  }
  public void testSimpleSkip() throws IOException {
    Directory dir = new CountingRAMDirectory(new RAMDirectory());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
                .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))
                .setMergePolicy(newLogMergePolicy()));
    Term term = new Term("test", "a");
    for (int i = 0; i < 5000; i++) {
      Document d1 = new Document();
      d1.add(newTextField(term.field(), term.text(), Field.Store.NO));
      writer.addDocument(d1);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.close();

    AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir));

    for (int i = 0; i < 2; i++) {
      counter = 0;
      DocsAndPositionsEnum tp = reader.termPositionsEnum(term);
      checkSkipTo(tp, 14, 185); // no skips
      checkSkipTo(tp, 17, 190); // one skip on level 0
      checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

      // this test would fail if we had only one skip level,
      // because than more bytes would be read from the freqStream
      checkSkipTo(tp, 4800, 250); // one skip on level 2
    }
  }
Beispiel #9
0
  /**
   * Remove a stale file (uidIter.term().text()) from the index database (and the xref file)
   *
   * @throws java.io.IOException if an error occurs
   */
  private void removeFile() throws IOException {
    String path = Util.uid2url(uidIter.term().utf8ToString());

    for (IndexChangedListener listener : listeners) {
      listener.fileRemove(path);
    }
    writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
    writer.prepareCommit();
    writer.commit();

    File xrefFile;
    if (RuntimeEnvironment.getInstance().isCompressXref()) {
      xrefFile = new File(xrefDir, path + ".gz");
    } else {
      xrefFile = new File(xrefDir, path);
    }
    File parent = xrefFile.getParentFile();

    if (!xrefFile.delete() && xrefFile.exists()) {
      log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath());
    }

    // Remove the parent directory if it's empty
    if (parent.delete()) {
      log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath());
    }
    setDirty();
    for (IndexChangedListener listener : listeners) {
      listener.fileRemoved(path);
    }
  }
  // Verifies no *.nrm exists when all fields omit norms:
  public void testNoNrmFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f1 = newField("f1", "This field has no norms", customType);
    d.add(f1);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    writer.commit();

    assertNoNrm(ram);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoNrm(ram);
    ram.close();
  }
  private void runTest(EnumSet<Type> types, TestType type)
      throws CorruptIndexException, IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    int num_1 = atLeast(200);
    int num_2 = atLeast(200);
    int num_3 = atLeast(200);
    long[] values = new long[num_1 + num_2 + num_3];
    index(writer, randomValueType(types, random()), values, 0, num_1);
    writer.commit();

    index(writer, randomValueType(types, random()), values, num_1, num_2);
    writer.commit();

    if (random().nextInt(4) == 0) {
      // once in a while use addIndexes
      writer.forceMerge(1);

      Directory dir_2 = newDirectory();
      IndexWriter writer_2 =
          new IndexWriter(
              dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
      index(writer_2, randomValueType(types, random()), values, num_1 + num_2, num_3);
      writer_2.commit();
      writer_2.close();
      if (rarely()) {
        writer.addIndexes(dir_2);
      } else {
        // do a real merge here
        IndexReader open = maybeWrapReader(IndexReader.open(dir_2));
        writer.addIndexes(open);
        open.close();
      }
      dir_2.close();
    } else {
      index(writer, randomValueType(types, random()), values, num_1 + num_2, num_3);
    }

    writer.forceMerge(1);
    writer.close();
    assertValues(type, dir, values);
    dir.close();
  }
  // test deleteAll()
  public void testDeleteAll() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Add 1 doc (so we will have something buffered)
    addDoc(modifier, 99, value);

    // Delete all
    modifier.deleteAll();

    // Delete all shouldn't be on disk yet
    reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Add a doc and update a doc (after the deleteAll, before the commit)
    addDoc(modifier, 101, value);
    updateDoc(modifier, 102, value);

    // commit the delete all
    modifier.commit();

    // Validate there are no docs left
    reader = IndexReader.open(dir, true);
    assertEquals(2, reader.numDocs());
    reader.close();

    modifier.close();
    dir.close();
  }
  // LUCENE-2593
  public void testCorruptionAfterDiskFullDuringMerge() throws IOException {
    MockDirectoryWrapper dir = newDirectory();
    // IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new
    // MockAnalyzer(random)).setReaderPooling(true));
    IndexWriter w =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
                .setMergeScheduler(new SerialMergeScheduler())
                .setReaderPooling(true)
                .setMergePolicy(newLogMergePolicy(2)));

    _TestUtil.keepFullyDeletedSegments(w);

    ((LogMergePolicy) w.getMergePolicy()).setMergeFactor(2);

    Document doc = new Document();
    doc.add(newField("f", "doctor who", Field.Store.YES, Field.Index.ANALYZED));
    w.addDocument(doc);
    w.commit();

    w.deleteDocuments(new Term("f", "who"));
    w.addDocument(doc);

    // disk fills up!
    FailTwiceDuringMerge ftdm = new FailTwiceDuringMerge();
    ftdm.setDoFail();
    dir.failOn(ftdm);

    try {
      w.commit();
      fail("fake disk full IOExceptions not hit");
    } catch (IOException ioe) {
      // expected
      assertTrue(ftdm.didFail1 || ftdm.didFail2);
    }
    _TestUtil.checkIndex(dir);
    ftdm.clearDoFail();
    w.addDocument(doc);
    w.close();

    dir.close();
  }
Beispiel #14
0
 private SegmentCommitInfo indexDoc(IndexWriter writer, String fileName) throws Exception {
   File file = new File(workDir, fileName);
   Document doc = new Document();
   InputStreamReader is = new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8);
   doc.add(new TextField("contents", is));
   writer.addDocument(doc);
   writer.commit();
   is.close();
   return writer.newestSegment();
 }
  // test that batched delete terms are flushed together
  public void testBatchDeletes() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    id = 0;
    modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
    modifier.deleteDocuments(new Term("id", String.valueOf(++id)));

    modifier.commit();

    reader = IndexReader.open(dir, true);
    assertEquals(5, reader.numDocs());
    reader.close();

    Term[] terms = new Term[3];
    for (int i = 0; i < terms.length; i++) {
      terms[i] = new Term("id", String.valueOf(++id));
    }
    modifier.deleteDocuments(terms);
    modifier.commit();
    reader = IndexReader.open(dir, true);
    assertEquals(2, reader.numDocs());
    reader.close();

    modifier.close();
    dir.close();
  }
  public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir)
      throws IOException, InterruptedException {
    Map<String, Document> docs = new HashMap<>();
    IndexWriter w =
        RandomIndexWriter.mockIndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE)
                .setRAMBufferSizeMB(0.1)
                .setMaxBufferedDocs(maxBufferedDocs)
                .setMergePolicy(newLogMergePolicy()),
            new YieldTestPoint());
    w.commit();
    LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(mergeFactor);
    /**
     * * w.setMaxMergeDocs(Integer.MAX_VALUE); w.setMaxFieldLength(10000); w.setRAMBufferSizeMB(1);
     * w.setMergeFactor(10); *
     */
    threads = new IndexingThread[nThreads];
    for (int i = 0; i < threads.length; i++) {
      IndexingThread th = new IndexingThread();
      th.w = w;
      th.base = 1000000 * i;
      th.range = range;
      th.iterations = iterations;
      threads[i] = th;
    }

    for (int i = 0; i < threads.length; i++) {
      threads[i].start();
    }
    for (int i = 0; i < threads.length; i++) {
      threads[i].join();
    }

    // w.forceMerge(1);
    // w.close();

    for (int i = 0; i < threads.length; i++) {
      IndexingThread th = threads[i];
      synchronized (th) {
        docs.putAll(th.docs);
      }
    }

    TestUtil.checkIndex(dir);
    DocsAndWriter dw = new DocsAndWriter();
    dw.docs = docs;
    dw.writer = w;
    return dw;
  }
  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
Beispiel #19
0
  public static void main(String[] args) throws IOException {
    Version version = Version.LUCENE_43;
    // 创建一个Document
    Document document = new Document();
    Field field =
        new TextField("fieldName", "Hello man can you see this in index!", Field.Store.YES);
    field.setBoost(2.0f);
    Field fieldStore = new StringField("fieldName2", "fieldValueOnlyStore", Field.Store.YES);
    //
    // FieldType fieldAllType = new FieldType();
    // fieldAllType.setIndexed(true);
    // fieldAllType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // fieldAllType.setOmitNorms(false);
    // fieldAllType.setStored(true);
    // fieldAllType.setStoreTermVectorOffsets(true);
    // fieldAllType.setStoreTermVectorPayloads(true);
    // fieldAllType.setStoreTermVectorPositions(true);
    // fieldAllType.setStoreTermVectors(true);
    // fieldAllType.setTokenized(true);
    // Field fieldAll = new Field("name", "all things need to store",
    // fieldAllType);

    document.add(field);
    // document.add(new BinaryDocValuesField("name", new
    // BytesRef("hello")));
    document.add(fieldStore);
    // document.add(fieldAll);

    Document doc2 = new Document();
    doc2.add(field);

    // 创建一个目录, 用于存放索引
    Directory directory = FSDirectory.open(new File("/home/waf/tmp/index"));
    // Directory directory = new RAMDirectory();
    // 定义索引写入器的一些参数
    Analyzer analyzer = new StandardAnalyzer(version);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(version, analyzer);

    // indexWriterConfig.setCodec(new Lucene40Codec());
    // 初始化索引写入器, 并把文档写入到索引中去
    IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
    indexWriter.addDocument(document);
    indexWriter.addDocument(doc2);
    indexWriter.commit();
    indexWriter.close();
    // 对索引进行查询
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    TopDocs result = indexSearcher.search(new TermQuery(new Term("name", "value")), 10);
    System.out.println(result.totalHits);
    reader.close();
  }
  // test when delete terms only apply to ram segments
  public void testRAMDeletes() throws IOException {
    for (int t = 0; t < 2; t++) {
      Directory dir = new MockRAMDirectory();
      IndexWriter modifier =
          new IndexWriter(
              dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
      modifier.setMaxBufferedDocs(4);
      modifier.setMaxBufferedDeleteTerms(4);

      int id = 0;
      int value = 100;

      addDoc(modifier, ++id, value);
      if (0 == t) modifier.deleteDocuments(new Term("value", String.valueOf(value)));
      else modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value))));
      addDoc(modifier, ++id, value);
      if (0 == t) {
        modifier.deleteDocuments(new Term("value", String.valueOf(value)));
        assertEquals(2, modifier.getNumBufferedDeleteTerms());
        assertEquals(1, modifier.getBufferedDeleteTermsSize());
      } else modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value))));

      addDoc(modifier, ++id, value);
      assertEquals(0, modifier.getSegmentCount());
      modifier.commit();

      modifier.commit();

      IndexReader reader = IndexReader.open(dir, true);
      assertEquals(1, reader.numDocs());

      int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
      assertEquals(1, hitCount);
      reader.close();
      modifier.close();
      dir.close();
    }
  }
  public void testNoWaitClose() throws IOException {
    Directory directory = newDirectory();
    Document doc = new Document();
    Field idField = newStringField("id", "", Field.Store.YES);
    doc.add(idField);

    IndexWriter writer =
        new IndexWriter(
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2)
                .setMergePolicy(newLogMergePolicy(100)));

    for (int iter = 0; iter < 10; iter++) {

      for (int j = 0; j < 201; j++) {
        idField.setStringValue(Integer.toString(iter * 201 + j));
        writer.addDocument(doc);
      }

      int delID = iter * 201;
      for (int j = 0; j < 20; j++) {
        writer.deleteDocuments(new Term("id", Integer.toString(delID)));
        delID += 5;
      }

      // Force a bunch of merge threads to kick off so we
      // stress out aborting them on close:
      ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3);
      writer.addDocument(doc);
      writer.commit();

      writer.close(false);

      IndexReader reader = DirectoryReader.open(directory);
      assertEquals((1 + iter) * 182, reader.numDocs());
      reader.close();

      // Reopen
      writer =
          new IndexWriter(
              directory,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setOpenMode(OpenMode.APPEND)
                  .setMergePolicy(newLogMergePolicy(100)));
    }
    writer.close();

    directory.close();
  }
 /**
  * Persists all snapshots information. If the given id and segment are not null, it persists their
  * information as well.
  */
 private void persistSnapshotInfos(String id, String segment) throws IOException {
   writer.deleteAll();
   Document d = new Document();
   FieldType ft = new FieldType();
   ft.setStored(true);
   d.add(new Field(SNAPSHOTS_ID, "", ft));
   for (Entry<String, String> e : super.getSnapshots().entrySet()) {
     d.add(new Field(e.getKey(), e.getValue(), ft));
   }
   if (id != null) {
     d.add(new Field(id, segment, ft));
   }
   writer.addDocument(d);
   writer.commit();
 }
  // LUCENE-1274
  public void testPrepareCommitNoChanges() throws IOException {
    Directory dir = newDirectory();

    IndexWriter writer =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    writer.prepareCommit();
    writer.commit();
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    assertEquals(0, reader.numDocs());
    reader.close();
    dir.close();
  }
 public void testTypeChangeAfterCommitAndDeleteAll() throws Exception {
   Directory dir = newDirectory();
   IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   IndexWriter writer = new IndexWriter(dir, conf);
   Document doc = new Document();
   doc.add(new NumericDocValuesField("dv", 0L));
   writer.addDocument(doc);
   writer.commit();
   writer.deleteAll();
   doc = new Document();
   doc.add(new SortedDocValuesField("dv", new BytesRef("foo")));
   writer.addDocument(doc);
   writer.close();
   dir.close();
 }
 @Override
 public void run() {
   try {
     long ramSize = 0;
     while (pendingDocs.decrementAndGet() > -1) {
       Document doc = docs.nextDoc();
       writer.addDocument(doc);
       long newRamSize = writer.ramSizeInBytes();
       if (newRamSize != ramSize) {
         ramSize = newRamSize;
       }
       if (doRandomCommit) {
         if (rarely()) {
           writer.commit();
         }
       }
     }
     writer.commit();
   } catch (Throwable ex) {
     System.out.println("FAILED exc:");
     ex.printStackTrace(System.out);
     throw new RuntimeException(ex);
   }
 }
  // LUCENE-1044: test writer.commit() when ac=false
  public void testForceCommit() throws IOException {
    Directory dir = newDirectory();

    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2)
                .setMergePolicy(newLogMergePolicy(5)));
    writer.commit();

    for (int i = 0; i < 23; i++) TestIndexWriter.addDoc(writer);

    DirectoryReader reader = DirectoryReader.open(dir);
    assertEquals(0, reader.numDocs());
    writer.commit();
    DirectoryReader reader2 = DirectoryReader.openIfChanged(reader);
    assertNotNull(reader2);
    assertEquals(0, reader.numDocs());
    assertEquals(23, reader2.numDocs());
    reader.close();

    for (int i = 0; i < 17; i++) TestIndexWriter.addDoc(writer);
    assertEquals(23, reader2.numDocs());
    reader2.close();
    reader = DirectoryReader.open(dir);
    assertEquals(23, reader.numDocs());
    reader.close();
    writer.commit();

    reader = DirectoryReader.open(dir);
    assertEquals(40, reader.numDocs());
    reader.close();
    writer.close();
    dir.close();
  }
  // Test that deletes committed after a merge started and
  // before it finishes, are correctly merged back:
  public void testDeleteMerging() throws IOException {
    Directory directory = newDirectory();

    LogDocMergePolicy mp = new LogDocMergePolicy();
    // Force degenerate merging so we can get a mix of
    // merging of segments with and without deletes at the
    // start:
    mp.setMinMergeDocs(1000);
    IndexWriter writer =
        new IndexWriter(
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(mp));

    Document doc = new Document();
    Field idField = newStringField("id", "", Field.Store.YES);
    doc.add(idField);
    for (int i = 0; i < 10; i++) {
      if (VERBOSE) {
        System.out.println("\nTEST: cycle");
      }
      for (int j = 0; j < 100; j++) {
        idField.setStringValue(Integer.toString(i * 100 + j));
        writer.addDocument(doc);
      }

      int delID = i;
      while (delID < 100 * (1 + i)) {
        if (VERBOSE) {
          System.out.println("TEST: del " + delID);
        }
        writer.deleteDocuments(new Term("id", "" + delID));
        delID += 10;
      }

      writer.commit();
    }

    writer.close();
    IndexReader reader = DirectoryReader.open(directory);
    // Verify that we did not lose any deletes...
    assertEquals(450, reader.numDocs());
    reader.close();
    directory.close();
  }
Beispiel #28
0
 /**
  * Writes the document to the directory using the analyzer and the similarity score; returns the
  * SegmentInfo describing the new segment
  */
 public static SegmentCommitInfo writeDoc(
     Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
     throws IOException {
   IndexWriter writer =
       new IndexWriter(
           dir,
           new IndexWriterConfig(
                   /* LuceneTestCase.newIndexWriterConfig(random, */
                   TEST_VERSION_CURRENT, analyzer)
               .setSimilarity(
                   similarity == null ? IndexSearcher.getDefaultSimilarity() : similarity));
   // writer.setNoCFSRatio(0.0);
   writer.addDocument(doc);
   writer.commit();
   SegmentCommitInfo info = writer.newestSegment();
   writer.close();
   return info;
 }
  // Verifies no *.prx exists when all fields omit term positions:
  public void testNoPrxFile() throws Throwable {
    Directory ram = newDirectory();
    if (ram instanceof MockDirectoryWrapper) {
      // we verify some files get deleted
      ((MockDirectoryWrapper) ram).setEnableVirusScanner(false);
    }
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    Field f1 = newField("f1", "This field has term freqs", ft);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    writer.commit();

    assertNoPrx(ram);

    // now add some documents with positions, and check there is no prox after optimization
    d = new Document();
    f1 = newTextField("f1", "This field has positions", Field.Store.NO);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoPrx(ram);
    ram.close();
  }
 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }