Example #1
0
  // Tests whether the DocumentWriter correctly enable the
  // omitTermFreqAndPositions bit in the FieldInfo
  public void testOmitTermFreqAndPositions() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document d = new Document();

    // this field will have Tf
    Field f1 = newField("f1", "This field has term freqs", normalType);
    d.add(f1);

    // this field will NOT have Tf
    Field f2 = newField("f2", "This field has NO Tf in all docs", omitType);
    d.add(f2);

    writer.addDocument(d);
    writer.forceMerge(1);
    // now we add another document which has term freq for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverse
    f1 = newField("f1", "This field has term freqs", omitType);
    d.add(f1);

    f2 = newField("f2", "This field has NO Tf in all docs", normalType);
    d.add(f2);

    writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertEquals(
        "OmitTermFreqAndPositions field bit should be set.",
        IndexOptions.DOCS_ONLY,
        fi.fieldInfo("f1").getIndexOptions());
    assertEquals(
        "OmitTermFreqAndPositions field bit should be set.",
        IndexOptions.DOCS_ONLY,
        fi.fieldInfo("f2").getIndexOptions());

    reader.close();
    ram.close();
  }
  public void testSimpleSkip() throws IOException {
    Directory dir = new CountingRAMDirectory(new RAMDirectory());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
                .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))
                .setMergePolicy(newLogMergePolicy()));
    Term term = new Term("test", "a");
    for (int i = 0; i < 5000; i++) {
      Document d1 = new Document();
      d1.add(newTextField(term.field(), term.text(), Field.Store.NO));
      writer.addDocument(d1);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.close();

    AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir));

    for (int i = 0; i < 2; i++) {
      counter = 0;
      DocsAndPositionsEnum tp = reader.termPositionsEnum(term);
      checkSkipTo(tp, 14, 185); // no skips
      checkSkipTo(tp, 17, 190); // one skip on level 0
      checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

      // this test would fail if we had only one skip level,
      // because than more bytes would be read from the freqStream
      checkSkipTo(tp, 4800, 250); // one skip on level 2
    }
  }
  // LUCENE-1262
  public void testExceptions() throws Throwable {
    Path indexDir = createTempDir("testfieldswriterexceptions");

    Directory fsDir = newFSDirectory(indexDir);
    FaultyFSDirectory dir = new FaultyFSDirectory(fsDir);
    IndexWriterConfig iwc =
        newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    for (int i = 0; i < 2; i++) writer.addDocument(testDoc);
    writer.forceMerge(1);
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    dir.startFailing();

    boolean exc = false;

    for (int i = 0; i < 2; i++) {
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
    }
    assertTrue(exc);
    reader.close();
    dir.close();
  }
  /**
   * Tests that index merging (specifically addIndexes(Directory...)) doesn't change the index order
   * of documents.
   */
  public void testLucene() throws IOException {
    int num = 100;

    Directory indexA = newDirectory();
    Directory indexB = newDirectory();

    fillIndex(random(), indexA, 0, num);
    boolean fail = verifyIndex(indexA, 0);
    if (fail) {
      fail("Index a is invalid");
    }

    fillIndex(random(), indexB, num, num);
    fail = verifyIndex(indexB, num);
    if (fail) {
      fail("Index b is invalid");
    }

    Directory merged = newDirectory();

    IndexWriter writer =
        new IndexWriter(
            merged,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(2)));
    writer.addIndexes(indexA, indexB);
    writer.forceMerge(1);
    writer.close();

    fail = verifyIndex(merged, 0);

    assertFalse("The merged index is invalid", fail);
    indexA.close();
    indexB.close();
    merged.close();
  }
Example #5
0
  // Verifies no *.nrm exists when all fields omit norms:
  public void testNoNrmFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f1 = newField("f1", "This field has no norms", customType);
    d.add(f1);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    writer.commit();

    assertNoNrm(ram);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoNrm(ram);
    ram.close();
  }
  // LUCENE-3870
  public void testLengthPrefixAcrossTwoPages() throws Exception {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    byte[] bytes = new byte[32764];
    BytesRef b = new BytesRef();
    b.bytes = bytes;
    b.length = bytes.length;
    doc.add(new SortedDocValuesField("field", b));
    w.addDocument(doc);
    bytes[0] = 1;
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field");

    BytesRef bytes1 = new BytesRef();
    s.get(0, bytes1);
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 0;
    assertEquals(b, bytes1);

    s.get(1, bytes1);
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 1;
    assertEquals(b, bytes1);
    r.close();
    w.close();
    d.close();
  }
  /*
   * Test a deletion policy that keeps last N commits.
   */
  public void testKeepLastNDeletionPolicy() throws IOException {
    final int N = 5;

    for (int pass = 0; pass < 2; pass++) {

      boolean useCompoundFile = (pass % 2) != 0;

      Directory dir = newDirectory();
      if (dir instanceof MockDirectoryWrapper) {
        // test manually deletes files
        ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
      }

      KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);
      for (int j = 0; j < N + 1; j++) {
        IndexWriterConfig conf =
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE)
                .setIndexDeletionPolicy(policy)
                .setMaxBufferedDocs(10);
        MergePolicy mp = conf.getMergePolicy();
        mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
        IndexWriter writer = new IndexWriter(dir, conf);
        policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
        for (int i = 0; i < 17; i++) {
          addDoc(writer);
        }
        writer.forceMerge(1);
        writer.close();
      }

      assertTrue(policy.numDelete > 0);
      assertEquals(N + 1, policy.numOnInit);
      assertEquals(N + 1, policy.numOnCommit);

      // Simplistic check: just verify only the past N segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getLastCommitGeneration(dir);
      for (int i = 0; i < N + 1; i++) {
        try {
          IndexReader reader = DirectoryReader.open(dir);
          reader.close();
          if (i == N) {
            fail("should have failed on commits prior to last " + N);
          }
        } catch (IOException e) {
          if (i != N) {
            throw e;
          }
        }
        if (i < N) {
          dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        }
        gen--;
      }

      dir.close();
    }
  }
  public void testForceMergeNotNeeded() throws IOException {
    try (Directory dir = newDirectory()) {
      final AtomicBoolean mayMerge = new AtomicBoolean(true);
      final MergeScheduler mergeScheduler =
          new SerialMergeScheduler() {
            @Override
            public synchronized void merge(
                IndexWriter writer, MergeTrigger trigger, boolean newMergesFound)
                throws IOException {
              if (mayMerge.get() == false) {
                MergePolicy.OneMerge merge = writer.getNextMerge();
                if (merge != null) {
                  System.out.println(
                      "TEST: we should not need any merging, yet merge policy returned merge "
                          + merge);
                  throw new AssertionError();
                }
              }

              super.merge(writer, trigger, newMergesFound);
            }
          };

      MergePolicy mp = mergePolicy();
      assumeFalse(
          "this test cannot tolerate random forceMerges",
          mp.toString().contains("MockRandomMergePolicy"));
      mp.setNoCFSRatio(random().nextBoolean() ? 0 : 1);

      IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
      iwc.setMergeScheduler(mergeScheduler);
      iwc.setMergePolicy(mp);

      IndexWriter writer = new IndexWriter(dir, iwc);
      final int numSegments = TestUtil.nextInt(random(), 2, 20);
      for (int i = 0; i < numSegments; ++i) {
        final int numDocs = TestUtil.nextInt(random(), 1, 5);
        for (int j = 0; j < numDocs; ++j) {
          writer.addDocument(new Document());
        }
        writer.getReader().close();
      }
      for (int i = 5; i >= 0; --i) {
        final int segmentCount = writer.getSegmentCount();
        final int maxNumSegments = i == 0 ? 1 : TestUtil.nextInt(random(), 1, 10);
        mayMerge.set(segmentCount > maxNumSegments);
        if (VERBOSE) {
          System.out.println(
              "TEST: now forceMerge(maxNumSegments="
                  + maxNumSegments
                  + ") vs segmentCount="
                  + segmentCount);
        }
        writer.forceMerge(maxNumSegments);
      }
      writer.close();
    }
  }
  private void runTest(EnumSet<Type> types, TestType type)
      throws CorruptIndexException, IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    int num_1 = atLeast(200);
    int num_2 = atLeast(200);
    int num_3 = atLeast(200);
    long[] values = new long[num_1 + num_2 + num_3];
    index(writer, randomValueType(types, random()), values, 0, num_1);
    writer.commit();

    index(writer, randomValueType(types, random()), values, num_1, num_2);
    writer.commit();

    if (random().nextInt(4) == 0) {
      // once in a while use addIndexes
      writer.forceMerge(1);

      Directory dir_2 = newDirectory();
      IndexWriter writer_2 =
          new IndexWriter(
              dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
      index(writer_2, randomValueType(types, random()), values, num_1 + num_2, num_3);
      writer_2.commit();
      writer_2.close();
      if (rarely()) {
        writer.addIndexes(dir_2);
      } else {
        // do a real merge here
        IndexReader open = maybeWrapReader(IndexReader.open(dir_2));
        writer.addIndexes(open);
        open.close();
      }
      dir_2.close();
    } else {
      index(writer, randomValueType(types, random()), values, num_1 + num_2, num_3);
    }

    writer.forceMerge(1);
    writer.close();
    assertValues(type, dir, values);
    dir.close();
  }
  // indexes Integer.MAX_VALUE docs with a fixed binary field
  public void testFixedSorted() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[2];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);

    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      bytes[0] = (byte) (i >> 8);
      bytes[1] = (byte) i;
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }

    w.forceMerge(1);
    w.close();

    System.out.println("verifying...");
    System.out.flush();

    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        bytes[0] = (byte) (expectedValue >> 8);
        bytes[1] = (byte) expectedValue;
        dv.get(i, scratch);
        assertEquals(data, scratch);
        expectedValue++;
      }
    }

    r.close();
    dir.close();
  }
Example #11
0
  // Tests whether the DocumentWriter correctly enable the
  // omitNorms bit in the FieldInfo
  public void testOmitNorms() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document d = new Document();

    // this field will have norms
    Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
    d.add(f1);

    // this field will NOT have norms
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f2 = newField("f2", "This field has NO norms in all docs", customType);
    d.add(f2);

    writer.addDocument(d);
    writer.forceMerge(1);
    // now we add another document which has term freq for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverse
    d.add(newField("f1", "This field has norms", customType));

    d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

    writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

    reader.close();
    ram.close();
  }
  /* Test keeping NO commit points.  This is a viable and
   * useful case eg where you want to build a big index and
   * you know there are no readers.
   */
  public void testKeepNoneOnInitDeletionPolicy() throws IOException {
    for (int pass = 0; pass < 2; pass++) {

      boolean useCompoundFile = (pass % 2) != 0;

      Directory dir = newDirectory();

      IndexWriterConfig conf =
          newIndexWriterConfig(new MockAnalyzer(random()))
              .setOpenMode(OpenMode.CREATE)
              .setIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy())
              .setMaxBufferedDocs(10);
      MergePolicy mp = conf.getMergePolicy();
      mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
      IndexWriter writer = new IndexWriter(dir, conf);
      KeepNoneOnInitDeletionPolicy policy =
          (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
      for (int i = 0; i < 107; i++) {
        addDoc(writer);
      }
      writer.close();

      conf =
          newIndexWriterConfig(new MockAnalyzer(random()))
              .setOpenMode(OpenMode.APPEND)
              .setIndexDeletionPolicy(policy);
      mp = conf.getMergePolicy();
      mp.setNoCFSRatio(1.0);
      writer = new IndexWriter(dir, conf);
      policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
      writer.forceMerge(1);
      writer.close();

      assertEquals(2, policy.numOnInit);
      // If we are not auto committing then there should
      // be exactly 2 commits (one per close above):
      assertEquals(2, policy.numOnCommit);

      // Simplistic check: just verify the index is in fact
      // readable:
      IndexReader reader = DirectoryReader.open(dir);
      reader.close();

      dir.close();
    }
  }
  // Verifies no *.prx exists when all fields omit term positions:
  public void testNoPrxFile() throws Throwable {
    Directory ram = newDirectory();
    if (ram instanceof MockDirectoryWrapper) {
      // we verify some files get deleted
      ((MockDirectoryWrapper) ram).setEnableVirusScanner(false);
    }
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    Field f1 = newField("f1", "This field has term freqs", ft);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    writer.commit();

    assertNoPrx(ram);

    // now add some documents with positions, and check there is no prox after optimization
    d = new Document();
    f1 = newTextField("f1", "This field has positions", Field.Store.NO);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoPrx(ram);
    ram.close();
  }
Example #14
0
  // Make sure first adding docs that do not omitNorms for
  // field X, then adding docs that do omitNorms for that same
  // field,
  public void testMixedRAM() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(10)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have norms
    Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
    d.add(f1);

    // this field will NOT have norms

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f2 = newField("f2", "This field has NO norms in all docs", customType);
    d.add(f2);

    for (int i = 0; i < 5; i++) {
      writer.addDocument(d);
    }

    for (int i = 0; i < 20; i++) {
      writer.addDocument(d);
    }

    // force merge
    writer.forceMerge(1);

    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms());
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

    reader.close();
    ram.close();
  }
Example #15
0
  // Make sure first adding docs that do not omitTermFreqAndPositions for
  // field X, then adding docs that do omitTermFreqAndPositions for that same
  // field,
  public void testMixedRAM() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(10)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have Tf
    Field f1 = newField("f1", "This field has term freqs", normalType);
    d.add(f1);

    // this field will NOT have Tf
    Field f2 = newField("f2", "This field has NO Tf in all docs", omitType);
    d.add(f2);

    for (int i = 0; i < 5; i++) writer.addDocument(d);

    for (int i = 0; i < 20; i++) writer.addDocument(d);

    // force merge
    writer.forceMerge(1);

    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertEquals(
        "OmitTermFreqAndPositions field bit should not be set.",
        IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
        fi.fieldInfo("f1").getIndexOptions());
    assertEquals(
        "OmitTermFreqAndPositions field bit should be set.",
        IndexOptions.DOCS_ONLY,
        fi.fieldInfo("f2").getIndexOptions());

    reader.close();
    ram.close();
  }
Example #16
0
  public void testWithPendingDeletes3() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();

    setUpDirs(dir, aux);
    IndexWriter writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));

    // Adds 10 docs, then replaces them with another 10
    // docs, so 10 pending deletes:
    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
      doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
      writer.updateDocument(new Term("id", "" + (i % 10)), doc);
    }

    // Deletes one of the 10 added docs, leaving 9:
    PhraseQuery q = new PhraseQuery();
    q.add(new Term("content", "bbb"));
    q.add(new Term("content", "14"));
    writer.deleteDocuments(q);

    writer.addIndexes(aux);

    writer.forceMerge(1);
    writer.commit();

    verifyNumDocs(dir, 1039);
    verifyTermDocs(dir, new Term("content", "aaa"), 1030);
    verifyTermDocs(dir, new Term("content", "bbb"), 9);

    writer.close();
    dir.close();
    aux.close();
  }
Example #17
0
  // LUCENE-1262
  public void testExceptions() throws Throwable {
    File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions");

    try {
      Directory dir = new FaultyFSDirectory(indexDir);
      IndexWriter writer =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setOpenMode(OpenMode.CREATE));
      for (int i = 0; i < 2; i++) writer.addDocument(testDoc);
      writer.forceMerge(1);
      writer.close();

      IndexReader reader = DirectoryReader.open(dir);

      FaultyIndexInput.doFail = true;

      boolean exc = false;

      for (int i = 0; i < 2; i++) {
        try {
          reader.document(i);
        } catch (IOException ioe) {
          // expected
          exc = true;
        }
        try {
          reader.document(i);
        } catch (IOException ioe) {
          // expected
          exc = true;
        }
      }
      assertTrue(exc);
      reader.close();
      dir.close();
    } finally {
      _TestUtil.rmDir(indexDir);
    }
  }
Example #18
0
  // Verifies no *.prx exists when all fields omit term freq:
  public void testNoPrxFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setUseCompoundFile(false);
    Document d = new Document();

    Field f1 = newField("f1", "This field has term freqs", omitType);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    writer.commit();

    assertNoPrx(ram);

    // now add some documents with positions, and check
    // there is no prox after full merge
    d = new Document();
    f1 = newTextField("f1", "This field has positions", Field.Store.NO);
    d.add(f1);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoPrx(ram);
    ram.close();
  }
Example #19
0
  /** Optimize the index database */
  public void optimize() {
    synchronized (lock) {
      if (running) {
        log.warning("Optimize terminated... Someone else is updating / optimizing it!");
        return;
      }
      running = true;
    }
    IndexWriter wrt = null;
    try {
      log.info("Optimizing the index ... ");
      Analyzer analyzer = new StandardAnalyzer(SearchEngine.LUCENE_VERSION);
      IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
      conf.setOpenMode(OpenMode.CREATE_OR_APPEND);

      wrt = new IndexWriter(indexDirectory, conf);
      wrt.forceMerge(1); // this is deprecated and not needed anymore
      log.info("done");
      synchronized (lock) {
        if (dirtyFile.exists() && !dirtyFile.delete()) {
          log.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
        }
        dirty = false;
      }
    } catch (IOException e) {
      log.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
    } finally {
      if (wrt != null) {
        try {
          wrt.close();
        } catch (IOException e) {
          log.log(Level.WARNING, "An error occured while closing writer", e);
        }
      }
      synchronized (lock) {
        running = false;
      }
    }
  }
Example #20
0
  // LUCENE-1382
  public void testCommitUserData() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2));
    for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w);
    w.close();

    DirectoryReader r = DirectoryReader.open(dir);
    // commit(Map) never called for this index
    assertEquals(0, r.getIndexCommit().getUserData().size());
    r.close();

    w =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2));
    for (int j = 0; j < 17; j++) TestIndexWriter.addDoc(w);
    Map<String, String> data = new HashMap<String, String>();
    data.put("label", "test1");
    w.commit(data);
    w.close();

    r = DirectoryReader.open(dir);
    assertEquals("test1", r.getIndexCommit().getUserData().get("label"));
    r.close();

    w =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    w.forceMerge(1);
    w.close();

    dir.close();
  }
  public void test() throws Exception {
    IndexWriterConfig defaultConfig = new IndexWriterConfig(null);
    Codec defaultCodec = defaultConfig.getCodec();
    if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) {
      Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)");
      Matcher matcher = regex.matcher(defaultCodec.toString());
      assertTrue(
          "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(),
          matcher.find());
      int maxDocsPerChunk = Integer.parseInt(matcher.group(1));
      int blockSize = Integer.parseInt(matcher.group(2));
      int product = maxDocsPerChunk * blockSize;
      assumeTrue(
          defaultCodec.getName()
              + " maxDocsPerChunk ("
              + maxDocsPerChunk
              + ") * blockSize ("
              + blockSize
              + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g",
          product >= 16);
    }

    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
                .setCodec(TestUtil.getDefaultCodec()));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(true);
    MyTokenStream tokenStream = new MyTokenStream();
    Field field = new Field("field", tokenStream, ft);
    doc.add(field);

    final int numDocs = 1000;
    for (int i = 0; i < numDocs; i++) {
      if (i % 2 == 1) { // trick blockPF's little optimization
        tokenStream.n = 65536;
      } else {
        tokenStream.n = 65537;
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader oneThousand = DirectoryReader.open(dir);
    DirectoryReader subReaders[] = new DirectoryReader[1000];
    Arrays.fill(subReaders, oneThousand);
    BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2"));
    if (dir2 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w2, subReaders);
    w2.forceMerge(1);
    w2.close();
    oneThousand.close();

    DirectoryReader oneMillion = DirectoryReader.open(dir2);
    subReaders = new DirectoryReader[2000];
    Arrays.fill(subReaders, oneMillion);
    BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3"));
    if (dir3 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w3, subReaders);
    w3.forceMerge(1);
    w3.close();
    oneMillion.close();

    dir.close();
    dir2.close();
    dir3.close();
  }
  public void testLiveMaxMergeCount() throws Exception {
    Directory d = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    TieredMergePolicy tmp = new TieredMergePolicy();
    tmp.setSegmentsPerTier(1000);
    tmp.setMaxMergeAtOnce(1000);
    tmp.setMaxMergeAtOnceExplicit(10);
    iwc.setMergePolicy(tmp);
    iwc.setMaxBufferedDocs(2);
    iwc.setRAMBufferSizeMB(-1);

    final AtomicInteger maxRunningMergeCount = new AtomicInteger();

    ConcurrentMergeScheduler cms =
        new ConcurrentMergeScheduler() {

          final AtomicInteger runningMergeCount = new AtomicInteger();

          @Override
          public void doMerge(MergePolicy.OneMerge merge) throws IOException {
            int count = runningMergeCount.incrementAndGet();
            // evil?
            synchronized (this) {
              if (count > maxRunningMergeCount.get()) {
                maxRunningMergeCount.set(count);
              }
            }
            try {
              super.doMerge(merge);
            } finally {
              runningMergeCount.decrementAndGet();
            }
          }
        };

    cms.setMaxMergesAndThreads(5, 3);

    iwc.setMergeScheduler(cms);

    IndexWriter w = new IndexWriter(d, iwc);
    // Makes 100 segments
    for (int i = 0; i < 200; i++) {
      w.addDocument(new Document());
    }

    // No merges should have run so far, because TMP has high segmentsPerTier:
    assertEquals(0, maxRunningMergeCount.get());

    w.forceMerge(1);

    // At most 5 merge threads should have launched at once:
    assertTrue("maxRunningMergeCount=" + maxRunningMergeCount, maxRunningMergeCount.get() <= 5);
    maxRunningMergeCount.set(0);

    // Makes another 100 segments
    for (int i = 0; i < 200; i++) {
      w.addDocument(new Document());
    }

    ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).setMaxMergesAndThreads(1, 1);
    w.forceMerge(1);

    // At most 1 merge thread should have launched at once:
    assertEquals(1, maxRunningMergeCount.get());

    w.close();
    d.close();
  }
  // @Absurd @Ignore takes ~20GB-30GB of space and 10 minutes.
  // with some codecs needs more heap space as well.
  @Ignore("Very slow. Enable manually by removing @Ignore.")
  public void test() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes1"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(true);
    MyTokenStream tokenStream = new MyTokenStream();
    Field field = new Field("field", tokenStream, ft);
    doc.add(field);

    final int numDocs = 1000;
    for (int i = 0; i < numDocs; i++) {
      if (i % 2 == 1) { // trick blockPF's little optimization
        tokenStream.n = 65536;
      } else {
        tokenStream.n = 65537;
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader oneThousand = DirectoryReader.open(dir);
    IndexReader subReaders[] = new IndexReader[1000];
    Arrays.fill(subReaders, oneThousand);
    MultiReader mr = new MultiReader(subReaders);
    BaseDirectoryWrapper dir2 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes2"));
    if (dir2 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
    w2.addIndexes(mr);
    w2.forceMerge(1);
    w2.close();
    oneThousand.close();

    DirectoryReader oneMillion = DirectoryReader.open(dir2);
    subReaders = new IndexReader[2000];
    Arrays.fill(subReaders, oneMillion);
    mr = new MultiReader(subReaders);
    BaseDirectoryWrapper dir3 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes3"));
    if (dir3 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
    w3.addIndexes(mr);
    w3.forceMerge(1);
    w3.close();
    oneMillion.close();

    dir.close();
    dir2.close();
    dir3.close();
  }
Example #24
0
  public void testSimpleCase() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // two auxiliary directories
    Directory aux = newDirectory();
    Directory aux2 = newDirectory();

    IndexWriter writer = null;

    writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE));
    // add 100 documents
    addDocs(writer, 100);
    assertEquals(100, writer.maxDoc());
    writer.close();
    TestUtil.checkIndex(dir);

    writer =
        newWriter(
            aux,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE)
                .setMergePolicy(newLogMergePolicy(false)));
    // add 40 documents in separate files
    addDocs(writer, 40);
    assertEquals(40, writer.maxDoc());
    writer.close();

    writer =
        newWriter(
            aux2,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE));
    // add 50 documents in compound files
    addDocs2(writer, 50);
    assertEquals(50, writer.maxDoc());
    writer.close();

    // test doc count before segments are merged
    writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));
    assertEquals(100, writer.maxDoc());
    writer.addIndexes(aux, aux2);
    assertEquals(190, writer.maxDoc());
    writer.close();
    TestUtil.checkIndex(dir);

    // make sure the old index is correct
    verifyNumDocs(aux, 40);

    // make sure the new index is correct
    verifyNumDocs(dir, 190);

    // now add another set in.
    Directory aux3 = newDirectory();
    writer =
        newWriter(aux3, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    // add 40 documents
    addDocs(writer, 40);
    assertEquals(40, writer.maxDoc());
    writer.close();

    // test doc count before segments are merged
    writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));
    assertEquals(190, writer.maxDoc());
    writer.addIndexes(aux3);
    assertEquals(230, writer.maxDoc());
    writer.close();

    // make sure the new index is correct
    verifyNumDocs(dir, 230);

    verifyTermDocs(dir, new Term("content", "aaa"), 180);

    verifyTermDocs(dir, new Term("content", "bbb"), 50);

    // now fully merge it.
    writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));
    writer.forceMerge(1);
    writer.close();

    // make sure the new index is correct
    verifyNumDocs(dir, 230);

    verifyTermDocs(dir, new Term("content", "aaa"), 180);

    verifyTermDocs(dir, new Term("content", "bbb"), 50);

    // now add a single document
    Directory aux4 = newDirectory();
    writer =
        newWriter(aux4, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    addDocs2(writer, 1);
    writer.close();

    writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));
    assertEquals(230, writer.maxDoc());
    writer.addIndexes(aux4);
    assertEquals(231, writer.maxDoc());
    writer.close();

    verifyNumDocs(dir, 231);

    verifyTermDocs(dir, new Term("content", "bbb"), 51);
    dir.close();
    aux.close();
    aux2.close();
    aux3.close();
    aux4.close();
  }
  /*
  Test: make sure when we run out of disk space or hit
  random IOExceptions in any of the addIndexes(*) calls
  that 1) index is not corrupt (searcher can open/search
  it) and 2) transactional semantics are followed:
  either all or none of the incoming documents were in
  fact added.
   */
  public void testAddIndexOnDiskFull() throws IOException {
    // MemoryCodec, since it uses FST, is not necessarily
    // "additive", ie if you add up N small FSTs, then merge
    // them, the merged result can easily be larger than the
    // sum because the merged FST may use array encoding for
    // some arcs (which uses more space):

    final String idFormat = _TestUtil.getPostingsFormat("id");
    final String contentFormat = _TestUtil.getPostingsFormat("content");
    assumeFalse(
        "This test cannot run with Memory codec",
        idFormat.equals("Memory") || contentFormat.equals("Memory"));

    int START_COUNT = 57;
    int NUM_DIR = TEST_NIGHTLY ? 50 : 5;
    int END_COUNT = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5);

    // Build up a bunch of dirs that have indexes which we
    // will then merge together by calling addIndexes(*):
    Directory[] dirs = new Directory[NUM_DIR];
    long inputDiskUsage = 0;
    for (int i = 0; i < NUM_DIR; i++) {
      dirs[i] = newDirectory();
      IndexWriter writer =
          new IndexWriter(
              dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
      for (int j = 0; j < 25; j++) {
        addDocWithIndex(writer, 25 * i + j);
      }
      writer.close();
      String[] files = dirs[i].listAll();
      for (int j = 0; j < files.length; j++) {
        inputDiskUsage += dirs[i].fileLength(files[j]);
      }
    }

    // Now, build a starting index that has START_COUNT docs.  We
    // will then try to addIndexes into a copy of this:
    MockDirectoryWrapper startDir = newMockDirectory();
    IndexWriter writer =
        new IndexWriter(
            startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    for (int j = 0; j < START_COUNT; j++) {
      addDocWithIndex(writer, j);
    }
    writer.close();

    // Make sure starting index seems to be working properly:
    Term searchTerm = new Term("content", "aaa");
    IndexReader reader = DirectoryReader.open(startDir);
    assertEquals("first docFreq", 57, reader.docFreq(searchTerm));

    IndexSearcher searcher = newSearcher(reader);
    ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
    assertEquals("first number of hits", 57, hits.length);
    reader.close();

    // Iterate with larger and larger amounts of free
    // disk space.  With little free disk space,
    // addIndexes will certainly run out of space &
    // fail.  Verify that when this happens, index is
    // not corrupt and index in fact has added no
    // documents.  Then, we increase disk space by 2000
    // bytes each iteration.  At some point there is
    // enough free disk space and addIndexes should
    // succeed and index should show all documents were
    // added.

    // String[] files = startDir.listAll();
    long diskUsage = startDir.sizeInBytes();

    long startDiskUsage = 0;
    String[] files = startDir.listAll();
    for (int i = 0; i < files.length; i++) {
      startDiskUsage += startDir.fileLength(files[i]);
    }

    for (int iter = 0; iter < 3; iter++) {

      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      // Start with 100 bytes more than we are currently using:
      long diskFree = diskUsage + _TestUtil.nextInt(random(), 50, 200);

      int method = iter;

      boolean success = false;
      boolean done = false;

      String methodName;
      if (0 == method) {
        methodName = "addIndexes(Directory[]) + forceMerge(1)";
      } else if (1 == method) {
        methodName = "addIndexes(IndexReader[])";
      } else {
        methodName = "addIndexes(Directory[])";
      }

      while (!done) {
        if (VERBOSE) {
          System.out.println("TEST: cycle...");
        }

        // Make a new dir that will enforce disk usage:
        MockDirectoryWrapper dir =
            new MockDirectoryWrapper(random(), new RAMDirectory(startDir, newIOContext(random())));
        writer =
            new IndexWriter(
                dir,
                newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                    .setOpenMode(OpenMode.APPEND)
                    .setMergePolicy(newLogMergePolicy(false)));
        IOException err = null;

        MergeScheduler ms = writer.getConfig().getMergeScheduler();
        for (int x = 0; x < 2; x++) {
          if (ms instanceof ConcurrentMergeScheduler)
            // This test intentionally produces exceptions
            // in the threads that CMS launches; we don't
            // want to pollute test output with these.
            if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
            else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions();

          // Two loops: first time, limit disk space &
          // throw random IOExceptions; second time, no
          // disk space limit:

          double rate = 0.05;
          double diskRatio = ((double) diskFree) / diskUsage;
          long thisDiskFree;

          String testName = null;

          if (0 == x) {
            dir.setRandomIOExceptionRateOnOpen(random().nextDouble() * 0.01);
            thisDiskFree = diskFree;
            if (diskRatio >= 2.0) {
              rate /= 2;
            }
            if (diskRatio >= 4.0) {
              rate /= 2;
            }
            if (diskRatio >= 6.0) {
              rate = 0.0;
            }
            if (VERBOSE)
              testName =
                  "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
          } else {
            dir.setRandomIOExceptionRateOnOpen(0.0);
            thisDiskFree = 0;
            rate = 0.0;
            if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space";
          }

          if (VERBOSE) System.out.println("\ncycle: " + testName);

          dir.setTrackDiskUsage(true);
          dir.setMaxSizeInBytes(thisDiskFree);
          dir.setRandomIOExceptionRate(rate);

          try {

            if (0 == method) {
              if (VERBOSE) {
                System.out.println("TEST: now addIndexes count=" + dirs.length);
              }
              writer.addIndexes(dirs);
              if (VERBOSE) {
                System.out.println("TEST: now forceMerge");
              }
              writer.forceMerge(1);
            } else if (1 == method) {
              IndexReader readers[] = new IndexReader[dirs.length];
              for (int i = 0; i < dirs.length; i++) {
                readers[i] = DirectoryReader.open(dirs[i]);
              }
              try {
                writer.addIndexes(readers);
              } finally {
                for (int i = 0; i < dirs.length; i++) {
                  readers[i].close();
                }
              }
            } else {
              writer.addIndexes(dirs);
            }

            success = true;
            if (VERBOSE) {
              System.out.println("  success!");
            }

            if (0 == x) {
              done = true;
            }

          } catch (IOException e) {
            success = false;
            err = e;
            if (VERBOSE) {
              System.out.println("  hit IOException: " + e);
              e.printStackTrace(System.out);
            }

            if (1 == x) {
              e.printStackTrace(System.out);
              fail(methodName + " hit IOException after disk space was freed up");
            }
          }

          // Make sure all threads from
          // ConcurrentMergeScheduler are done
          _TestUtil.syncConcurrentMerges(writer);

          if (VERBOSE) {
            System.out.println("  now test readers");
          }

          // Finally, verify index is not corrupt, and, if
          // we succeeded, we see all docs added, and if we
          // failed, we see either all docs or no docs added
          // (transactional semantics):
          dir.setRandomIOExceptionRateOnOpen(0.0);
          try {
            reader = DirectoryReader.open(dir);
          } catch (IOException e) {
            e.printStackTrace(System.out);
            fail(testName + ": exception when creating IndexReader: " + e);
          }
          int result = reader.docFreq(searchTerm);
          if (success) {
            if (result != START_COUNT) {
              fail(
                  testName
                      + ": method did not throw exception but docFreq('aaa') is "
                      + result
                      + " instead of expected "
                      + START_COUNT);
            }
          } else {
            // On hitting exception we still may have added
            // all docs:
            if (result != START_COUNT && result != END_COUNT) {
              err.printStackTrace(System.out);
              fail(
                  testName
                      + ": method did throw exception but docFreq('aaa') is "
                      + result
                      + " instead of expected "
                      + START_COUNT
                      + " or "
                      + END_COUNT);
            }
          }

          searcher = newSearcher(reader);
          try {
            hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs;
          } catch (IOException e) {
            e.printStackTrace(System.out);
            fail(testName + ": exception when searching: " + e);
          }
          int result2 = hits.length;
          if (success) {
            if (result2 != result) {
              fail(
                  testName
                      + ": method did not throw exception but hits.length for search on term 'aaa' is "
                      + result2
                      + " instead of expected "
                      + result);
            }
          } else {
            // On hitting exception we still may have added
            // all docs:
            if (result2 != result) {
              err.printStackTrace(System.out);
              fail(
                  testName
                      + ": method did throw exception but hits.length for search on term 'aaa' is "
                      + result2
                      + " instead of expected "
                      + result);
            }
          }

          reader.close();
          if (VERBOSE) {
            System.out.println("  count is " + result);
          }

          if (done || result == END_COUNT) {
            break;
          }
        }

        if (VERBOSE) {
          System.out.println(
              "  start disk = "
                  + startDiskUsage
                  + "; input disk = "
                  + inputDiskUsage
                  + "; max used = "
                  + dir.getMaxUsedSizeInBytes());
        }

        if (done) {
          // Javadocs state that temp free Directory space
          // required is at most 2X total input size of
          // indices so let's make sure:
          assertTrue(
              "max free Directory space required exceeded 1X the total input index sizes during "
                  + methodName
                  + ": max temp usage = "
                  + (dir.getMaxUsedSizeInBytes() - startDiskUsage)
                  + " bytes vs limit="
                  + (2 * (startDiskUsage + inputDiskUsage))
                  + "; starting disk usage = "
                  + startDiskUsage
                  + " bytes; "
                  + "input index disk usage = "
                  + inputDiskUsage
                  + " bytes",
              (dir.getMaxUsedSizeInBytes() - startDiskUsage)
                  < 2 * (startDiskUsage + inputDiskUsage));
        }

        // Make sure we don't hit disk full during close below:
        dir.setMaxSizeInBytes(0);
        dir.setRandomIOExceptionRate(0.0);
        dir.setRandomIOExceptionRateOnOpen(0.0);

        writer.close();

        // Wait for all BG threads to finish else
        // dir.close() will throw IOException because
        // there are still open files
        _TestUtil.syncConcurrentMerges(ms);

        dir.close();

        // Try again with more free space:
        diskFree +=
            TEST_NIGHTLY
                ? _TestUtil.nextInt(random(), 4000, 8000)
                : _TestUtil.nextInt(random(), 40000, 80000);
      }
    }

    startDir.close();
    for (Directory dir : dirs) dir.close();
  }
  public void testMergeIncompatibleTypes() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig writerConfig =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    writerConfig.setMergePolicy(
        NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
    IndexWriter writer = new IndexWriter(dir, writerConfig);
    int num_1 = atLeast(200);
    int num_2 = atLeast(200);
    long[] values = new long[num_1 + num_2];
    index(writer, randomValueType(INTEGERS, random()), values, 0, num_1);
    writer.commit();

    if (random().nextInt(4) == 0) {
      // once in a while use addIndexes
      Directory dir_2 = newDirectory();
      IndexWriter writer_2 =
          new IndexWriter(
              dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
      index(
          writer_2,
          randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()),
          values,
          num_1,
          num_2);
      writer_2.commit();
      writer_2.close();
      if (random().nextBoolean()) {
        writer.addIndexes(dir_2);
      } else {
        // do a real merge here
        IndexReader open = IndexReader.open(dir_2);
        writer.addIndexes(open);
        open.close();
      }
      dir_2.close();
    } else {
      index(
          writer,
          randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()),
          values,
          num_1,
          num_2);
      writer.commit();
    }
    writer.close();
    writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
      writerConfig.setMergePolicy(
          newLogMergePolicy()); // make sure we merge to one segment (merge everything together)
    }
    writer = new IndexWriter(dir, writerConfig);
    // now merge
    writer.forceMerge(1);
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    assertEquals(1, reader.getSequentialSubReaders().length);
    IndexReaderContext topReaderContext = reader.getTopReaderContext();
    AtomicReaderContext[] children = topReaderContext.leaves();
    DocValues docValues = children[0].reader().docValues("promote");
    assertNotNull(docValues);
    assertValues(TestType.Byte, dir, values);
    assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType());
    reader.close();
    dir.close();
  }
  /* Uses KeepAllDeletionPolicy to keep all commits around,
   * then, opens a new IndexWriter on a previous commit
   * point. */
  public void testOpenPriorSnapshot() throws IOException {
    Directory dir = newDirectory();

    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setIndexDeletionPolicy(new KeepAllDeletionPolicy(dir))
                .setMaxBufferedDocs(2)
                .setMergePolicy(newLogMergePolicy(10)));
    KeepAllDeletionPolicy policy =
        (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
    for (int i = 0; i < 10; i++) {
      addDoc(writer);
      if ((1 + i) % 2 == 0) writer.commit();
    }
    writer.close();

    Collection<IndexCommit> commits = DirectoryReader.listCommits(dir);
    assertEquals(5, commits.size());
    IndexCommit lastCommit = null;
    for (final IndexCommit commit : commits) {
      if (lastCommit == null || commit.getGeneration() > lastCommit.getGeneration())
        lastCommit = commit;
    }
    assertTrue(lastCommit != null);

    // Now add 1 doc and merge
    writer =
        new IndexWriter(
            dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy));
    addDoc(writer);
    assertEquals(11, writer.numDocs());
    writer.forceMerge(1);
    writer.close();

    assertEquals(6, DirectoryReader.listCommits(dir).size());

    // Now open writer on the commit just before merge:
    writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setIndexDeletionPolicy(policy)
                .setIndexCommit(lastCommit));
    assertEquals(10, writer.numDocs());

    // Should undo our rollback:
    writer.rollback();

    DirectoryReader r = DirectoryReader.open(dir);
    // Still merged, still 11 docs
    assertEquals(1, r.leaves().size());
    assertEquals(11, r.numDocs());
    r.close();

    writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setIndexDeletionPolicy(policy)
                .setIndexCommit(lastCommit));
    assertEquals(10, writer.numDocs());
    // Commits the rollback:
    writer.close();

    // Now 8 because we made another commit
    assertEquals(7, DirectoryReader.listCommits(dir).size());

    r = DirectoryReader.open(dir);
    // Not fully merged because we rolled it back, and now only
    // 10 docs
    assertTrue(r.leaves().size() > 1);
    assertEquals(10, r.numDocs());
    r.close();

    // Re-merge
    writer =
        new IndexWriter(
            dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy));
    writer.forceMerge(1);
    writer.close();

    r = DirectoryReader.open(dir);
    assertEquals(1, r.leaves().size());
    assertEquals(10, r.numDocs());
    r.close();

    // Now open writer on the commit just before merging,
    // but this time keeping only the last commit:
    writer =
        new IndexWriter(
            dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit));
    assertEquals(10, writer.numDocs());

    // Reader still sees fully merged index, because writer
    // opened on the prior commit has not yet committed:
    r = DirectoryReader.open(dir);
    assertEquals(1, r.leaves().size());
    assertEquals(10, r.numDocs());
    r.close();

    writer.close();

    // Now reader sees not-fully-merged index:
    r = DirectoryReader.open(dir);
    assertTrue(r.leaves().size() > 1);
    assertEquals(10, r.numDocs());
    r.close();

    dir.close();
  }
  /*
   * Test a silly deletion policy that keeps all commits around.
   */
  public void testKeepAllDeletionPolicy() throws IOException {
    for (int pass = 0; pass < 2; pass++) {

      if (VERBOSE) {
        System.out.println("TEST: cycle pass="******"TEST: open writer for forceMerge");
        }
        writer = new IndexWriter(dir, conf);
        policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
        writer.forceMerge(1);
        writer.close();
      }

      assertEquals(needsMerging ? 2 : 1, policy.numOnInit);

      // If we are not auto committing then there should
      // be exactly 2 commits (one per close above):
      assertEquals(1 + (needsMerging ? 1 : 0), policy.numOnCommit);

      // Test listCommits
      Collection<IndexCommit> commits = DirectoryReader.listCommits(dir);
      // 2 from closing writer
      assertEquals(1 + (needsMerging ? 1 : 0), commits.size());

      // Make sure we can open a reader on each commit:
      for (final IndexCommit commit : commits) {
        IndexReader r = DirectoryReader.open(commit);
        r.close();
      }

      // Simplistic check: just verify all segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getLastCommitGeneration(dir);
      while (gen > 0) {
        IndexReader reader = DirectoryReader.open(dir);
        reader.close();
        dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        gen--;

        if (gen > 0) {
          // Now that we've removed a commit point, which
          // should have orphan'd at least one index file.
          // Open & close a writer and assert that it
          // actually removed something:
          int preCount = dir.listAll().length;
          writer =
              new IndexWriter(
                  dir,
                  newIndexWriterConfig(new MockAnalyzer(random()))
                      .setOpenMode(OpenMode.APPEND)
                      .setIndexDeletionPolicy(policy));
          writer.close();
          int postCount = dir.listAll().length;
          assertTrue(postCount < preCount);
        }
      }

      dir.close();
    }
  }
  @Nightly
  public void test() throws Exception {
    MockDirectoryWrapper dir =
        new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields")));
    dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    final Document doc = new Document();
    final FieldType ft = new FieldType();
    ft.setIndexed(false);
    ft.setStored(true);
    ft.freeze();
    final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20);
    final byte[] value = new byte[valueLength];
    for (int i = 0; i < valueLength; ++i) {
      // random so that even compressing codecs can't compress it
      value[i] = (byte) random().nextInt(256);
    }
    final Field f = new Field("fld", value, ft);
    doc.add(f);

    final int numDocs = (int) ((1L << 32) / valueLength + 100);
    for (int i = 0; i < numDocs; ++i) {
      w.addDocument(doc);
      if (VERBOSE && i % (numDocs / 10) == 0) {
        System.out.println(i + " of " + numDocs + "...");
      }
    }
    w.forceMerge(1);
    w.close();
    if (VERBOSE) {
      boolean found = false;
      for (String file : dir.listAll()) {
        if (file.endsWith(".fdt")) {
          final long fileLength = dir.fileLength(file);
          if (fileLength >= 1L << 32) {
            found = true;
          }
          System.out.println("File length of " + file + " : " + fileLength);
        }
      }
      if (!found) {
        System.out.println("No .fdt file larger than 4GB, test bug?");
      }
    }

    DirectoryReader rd = DirectoryReader.open(dir);
    Document sd = rd.document(numDocs - 1);
    assertNotNull(sd);
    assertEquals(1, sd.getFields().size());
    BytesRef valueRef = sd.getBinaryValue("fld");
    assertNotNull(valueRef);
    assertEquals(new BytesRef(value), valueRef);
    rd.close();

    dir.close();
  }
Example #30
0
  // Test scores with one field with Term Freqs and one without, otherwise with equal content
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(2)
                .setSimilarity(new SimpleSimilarity())
                .setMergePolicy(newLogMergePolicy(2)));

    StringBuilder sb = new StringBuilder(265);
    String term = "term";
    for (int i = 0; i < 30; i++) {
      Document d = new Document();
      sb.append(term).append(" ");
      String content = sb.toString();
      Field noTf = newField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);
      d.add(noTf);

      Field tf = newField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);
      d.add(tf);

      writer.addDocument(d);
      // System.out.println(d);
    }

    writer.forceMerge(1);
    // flush
    writer.close();

    /*
     * Verify the index
     */
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(reader);
    searcher.setSimilarity(new SimpleSimilarity());

    Term a = new Term("noTf", term);
    Term b = new Term("tf", term);
    Term c = new Term("noTf", "notf");
    Term d = new Term("tf", "tf");
    TermQuery q1 = new TermQuery(a);
    TermQuery q2 = new TermQuery(b);
    TermQuery q3 = new TermQuery(c);
    TermQuery q4 = new TermQuery(d);

    PhraseQuery pq = new PhraseQuery();
    pq.add(a);
    pq.add(c);
    try {
      searcher.search(pq, 10);
      fail("did not hit expected exception");
    } catch (Exception e) {
      Throwable cause = e;
      // If the searcher uses an executor service, the IAE is wrapped into other exceptions
      while (cause.getCause() != null) {
        cause = cause.getCause();
      }
      assertTrue("Expected an IAE, got " + cause, cause instanceof IllegalStateException);
    }

    searcher.search(
        q1,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertTrue("got score=" + score, score == 1.0f);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q2,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q2: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertEquals(1.0f + doc, score, 0.00001f);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q3,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertTrue(score == 1.0f);
            assertFalse(doc % 2 == 0);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q4,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            float score = scorer.score();
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            assertTrue(score == 1.0f);
            assertTrue(doc % 2 == 0);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    BooleanQuery bq = new BooleanQuery();
    bq.add(q1, Occur.MUST);
    bq.add(q4, Occur.MUST);

    searcher.search(
        bq,
        new CountingHitCollector() {
          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("BQ: Doc=" + doc + " score=" + score);
            super.collect(doc);
          }
        });
    assertEquals(15, CountingHitCollector.getCount());

    reader.close();
    dir.close();
  }