public void testEmptyDocs() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    // make sure that the fact that documents might be empty is not a problem
    final Document emptyDoc = new Document();
    final int numDocs = random().nextBoolean() ? 1 : atLeast(1000);
    for (int i = 0; i < numDocs; ++i) {
      iw.addDocument(emptyDoc);
    }
    iw.commit();
    final DirectoryReader rd = DirectoryReader.open(dir);
    for (int i = 0; i < numDocs; ++i) {
      final Document doc = rd.document(i);
      assertNotNull(doc);
      assertTrue(doc.getFields().isEmpty());
    }
    rd.close();

    iw.close();
    dir.close();
  }
  public void testTotalBytesSize() throws Exception {
    Directory d = newDirectory();
    if (d instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setMaxBufferedDocs(5);
    iwc.setMergeScheduler(new TrackingCMS());
    if (TestUtil.getPostingsFormat("id").equals("SimpleText")) {
      // no
      iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
    }
    IndexWriter w = new IndexWriter(d, iwc);
    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
      doc.add(new StringField("id", "" + i, Field.Store.NO));
      w.addDocument(doc);

      if (random().nextBoolean()) {
        w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1)));
      }
    }
    assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0);
    w.close();
    d.close();
  }
  public void testReadSkip() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    FieldType ft = new FieldType();
    ft.setStored(true);
    ft.freeze();

    final String string = _TestUtil.randomSimpleString(random(), 50);
    final byte[] bytes = string.getBytes("UTF-8");
    final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
    final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
    final float f = random().nextFloat();
    final double d = random().nextDouble();

    List<Field> fields =
        Arrays.asList(
            new Field("bytes", bytes, ft),
            new Field("string", string, ft),
            new LongField("long", l, Store.YES),
            new IntField("int", i, Store.YES),
            new FloatField("float", f, Store.YES),
            new DoubleField("double", d, Store.YES));

    for (int k = 0; k < 100; ++k) {
      Document doc = new Document();
      for (Field fld : fields) {
        doc.add(fld);
      }
      iw.w.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final int docID = random().nextInt(100);
    for (Field fld : fields) {
      String fldName = fld.name();
      final Document sDoc = reader.document(docID, Collections.singleton(fldName));
      final IndexableField sField = sDoc.getField(fldName);
      if (Field.class.equals(fld.getClass())) {
        assertEquals(fld.binaryValue(), sField.binaryValue());
        assertEquals(fld.stringValue(), sField.stringValue());
      } else {
        assertEquals(fld.numericValue(), sField.numericValue());
      }
    }
    reader.close();
    iw.close();
    dir.close();
  }
  protected void runFlushByRam(int numThreads, double maxRamMB, boolean ensureNotStalled)
      throws IOException, InterruptedException {
    final int numDocumentsToIndex = 10 + atLeast(30);
    AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
    Directory dir = newDirectory();
    MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
    IndexWriterConfig iwc =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
            .setFlushPolicy(flushPolicy);
    final int numDWPT = 1 + atLeast(2);
    DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
    iwc.setIndexerThreadPool(threadPool);
    iwc.setRAMBufferSizeMB(maxRamMB);
    iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    IndexWriter writer = new IndexWriter(dir, iwc);
    flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy();
    assertFalse(flushPolicy.flushOnDocCount());
    assertFalse(flushPolicy.flushOnDeleteTerms());
    assertTrue(flushPolicy.flushOnRAM());
    DocumentsWriter docsWriter = writer.getDocsWriter();
    assertNotNull(docsWriter);
    DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
    assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes());

    IndexThread[] threads = new IndexThread[numThreads];
    for (int x = 0; x < threads.length; x++) {
      threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, false);
      threads[x].start();
    }

    for (int x = 0; x < threads.length; x++) {
      threads[x].join();
    }
    final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.);
    assertEquals(" all flushes must be due numThreads=" + numThreads, 0, flushControl.flushBytes());
    assertEquals(numDocumentsToIndex, writer.numDocs());
    assertEquals(numDocumentsToIndex, writer.maxDoc());
    assertTrue(
        "peak bytes without flush exceeded watermark",
        flushPolicy.peakBytesWithoutFlush <= maxRAMBytes);
    assertActiveBytesAfter(flushControl);
    if (flushPolicy.hasMarkedPending) {
      assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
    }
    if (ensureNotStalled) {
      assertFalse(docsWriter.flushControl.stallControl.wasStalled());
    }
    writer.close();
    assertEquals(0, flushControl.activeBytes());
    dir.close();
  }
  public void testFlushDocCount() throws IOException, InterruptedException {
    int[] numThreads = new int[] {2 + atLeast(1), 1};
    for (int i = 0; i < numThreads.length; i++) {

      final int numDocumentsToIndex = 50 + atLeast(30);
      AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
      Directory dir = newDirectory();
      MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
      IndexWriterConfig iwc =
          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
              .setFlushPolicy(flushPolicy);

      final int numDWPT = 1 + atLeast(2);
      DocumentsWriterPerThreadPool threadPool =
          new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
      iwc.setIndexerThreadPool(threadPool);
      iwc.setMaxBufferedDocs(2 + atLeast(10));
      iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
      iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
      IndexWriter writer = new IndexWriter(dir, iwc);
      flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy();
      assertTrue(flushPolicy.flushOnDocCount());
      assertFalse(flushPolicy.flushOnDeleteTerms());
      assertFalse(flushPolicy.flushOnRAM());
      DocumentsWriter docsWriter = writer.getDocsWriter();
      assertNotNull(docsWriter);
      DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
      assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes());

      IndexThread[] threads = new IndexThread[numThreads[i]];
      for (int x = 0; x < threads.length; x++) {
        threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false);
        threads[x].start();
      }

      for (int x = 0; x < threads.length; x++) {
        threads[x].join();
      }

      assertEquals(
          " all flushes must be due numThreads=" + numThreads[i], 0, flushControl.flushBytes());
      assertEquals(numDocumentsToIndex, writer.numDocs());
      assertEquals(numDocumentsToIndex, writer.maxDoc());
      assertTrue(
          "peak bytes without flush exceeded watermark",
          flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs());
      assertActiveBytesAfter(flushControl);
      writer.close();
      assertEquals(0, flushControl.activeBytes());
      dir.close();
    }
  }
  public void testStallControl() throws InterruptedException, IOException {

    int[] numThreads = new int[] {4 + random().nextInt(8), 1};
    final int numDocumentsToIndex = 50 + random().nextInt(50);
    for (int i = 0; i < numThreads.length; i++) {
      AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
      MockDirectoryWrapper dir = newMockDirectory();
      // mock a very slow harddisk sometimes here so that flushing is very slow
      dir.setThrottling(MockDirectoryWrapper.Throttling.SOMETIMES);
      IndexWriterConfig iwc =
          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
      iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
      iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
      FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
      iwc.setFlushPolicy(flushPolicy);

      DocumentsWriterPerThreadPool threadPool =
          new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2);
      iwc.setIndexerThreadPool(threadPool);
      // with such a small ram buffer we should be stalled quiet quickly
      iwc.setRAMBufferSizeMB(0.25);
      IndexWriter writer = new IndexWriter(dir, iwc);
      IndexThread[] threads = new IndexThread[numThreads[i]];
      for (int x = 0; x < threads.length; x++) {
        threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false);
        threads[x].start();
      }

      for (int x = 0; x < threads.length; x++) {
        threads[x].join();
      }
      DocumentsWriter docsWriter = writer.getDocsWriter();
      assertNotNull(docsWriter);
      DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
      assertEquals(" all flushes must be due", 0, flushControl.flushBytes());
      assertEquals(numDocumentsToIndex, writer.numDocs());
      assertEquals(numDocumentsToIndex, writer.maxDoc());
      if (numThreads[i] == 1) {
        assertFalse(
            "single thread must not block numThreads: " + numThreads[i],
            docsWriter.flushControl.stallControl.hasBlocked());
      }
      if (docsWriter.flushControl.peakNetBytes
          > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
        assertTrue(docsWriter.flushControl.stallControl.wasStalled());
      }
      assertActiveBytesAfter(flushControl);
      writer.close(true);
      dir.close();
    }
  }
  @Nightly
  public void testBigDocuments() throws IOException {
    // "big" as "much bigger than the chunk size"
    // for this test we force a FS dir
    // we can't just use newFSDirectory, because this test doesn't really index anything.
    // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
    Directory dir =
        new MockDirectoryWrapper(
            random(), new MMapDirectory(_TestUtil.getTempDir("testBigDocuments")));
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
    }

    final Document emptyDoc = new Document(); // emptyDoc
    final Document bigDoc1 = new Document(); // lot of small fields
    final Document bigDoc2 = new Document(); // 1 very big field

    final Field idField = new StringField("id", "", Store.NO);
    emptyDoc.add(idField);
    bigDoc1.add(idField);
    bigDoc2.add(idField);

    final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
    onlyStored.setIndexed(false);

    final Field smallField =
        new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
    final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000);
    for (int i = 0; i < numFields; ++i) {
      bigDoc1.add(smallField);
    }

    final Field bigField =
        new Field(
            "fld",
            randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2),
            onlyStored);
    bigDoc2.add(bigField);

    final int numDocs = atLeast(5);
    final Document[] docs = new Document[numDocs];
    for (int i = 0; i < numDocs; ++i) {
      docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
    }
    for (int i = 0; i < numDocs; ++i) {
      idField.setStringValue("" + i);
      iw.addDocument(docs[i]);
      if (random().nextInt(numDocs) == 0) {
        iw.commit();
      }
    }
    iw.commit();
    iw.forceMerge(1); // look at what happens when big docs are merged
    final DirectoryReader rd = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(rd);
    for (int i = 0; i < numDocs; ++i) {
      final Query query = new TermQuery(new Term("id", "" + i));
      final TopDocs topDocs = searcher.search(query, 1);
      assertEquals("" + i, 1, topDocs.totalHits);
      final Document doc = rd.document(topDocs.scoreDocs[0].doc);
      assertNotNull(doc);
      final IndexableField[] fieldValues = doc.getFields("fld");
      assertEquals(docs[i].getFields("fld").length, fieldValues.length);
      if (fieldValues.length > 0) {
        assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
      }
    }
    rd.close();
    iw.close();
    dir.close();
  }
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene46Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone());

    final int docCount = atLeast(200);
    final byte[][][] data = new byte[docCount][][];
    for (int i = 0; i < docCount; ++i) {
      final int fieldCount =
          rarely()
              ? RandomInts.randomIntBetween(random(), 1, 500)
              : RandomInts.randomIntBetween(random(), 1, 5);
      data[i] = new byte[fieldCount][];
      for (int j = 0; j < fieldCount; ++j) {
        final int length = rarely() ? random().nextInt(1000) : random().nextInt(10);
        final int max = rarely() ? 256 : 2;
        data[i][j] = randomByteArray(length, max);
      }
    }

    final FieldType type = new FieldType(StringField.TYPE_STORED);
    type.setIndexed(false);
    type.freeze();
    IntField id = new IntField("id", 0, Store.YES);
    for (int i = 0; i < data.length; ++i) {
      Document doc = new Document();
      doc.add(id);
      id.setIntValue(i);
      for (int j = 0; j < data[i].length; ++j) {
        Field f = new Field("bytes" + j, data[i][j], type);
        doc.add(f);
      }
      iw.w.addDocument(doc);
      if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
        iw.w.close();
        // test merging against a non-compressing codec
        if (iwConf.getCodec() == otherCodec) {
          iwConf.setCodec(Codec.getDefault());
        } else {
          iwConf.setCodec(otherCodec);
        }
        iw = new RandomIndexWriter(random(), dir, iwConf.clone());
      }
    }

    for (int i = 0; i < 10; ++i) {
      final int min = random().nextInt(data.length);
      final int max = min + random().nextInt(20);
      iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
    }

    iw.forceMerge(2); // force merges with deletions

    iw.commit();

    final DirectoryReader ir = DirectoryReader.open(dir);
    assertTrue(ir.numDocs() > 0);
    int numDocs = 0;
    for (int i = 0; i < ir.maxDoc(); ++i) {
      final Document doc = ir.document(i);
      if (doc == null) {
        continue;
      }
      ++numDocs;
      final int docId = doc.getField("id").numericValue().intValue();
      assertEquals(data[docId].length + 1, doc.getFields().size());
      for (int j = 0; j < data[docId].length; ++j) {
        final byte[] arr = data[docId][j];
        final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
        final byte[] arr2 =
            Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
        assertArrayEquals(arr, arr2);
      }
    }
    assertTrue(ir.numDocs() <= numDocs);
    ir.close();

    iw.deleteAll();
    iw.commit();
    iw.forceMerge(1);

    iw.close();
    dir.close();
  }
  public void testConcurrentReads() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    // make sure the readers are properly cloned
    final Document doc = new Document();
    final Field field = new StringField("fld", "", Store.YES);
    doc.add(field);
    final int numDocs = atLeast(1000);
    for (int i = 0; i < numDocs; ++i) {
      field.setStringValue("" + i);
      iw.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader rd = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(rd);
    final int concurrentReads = atLeast(5);
    final int readsPerThread = atLeast(50);
    final List<Thread> readThreads = new ArrayList<Thread>();
    final AtomicReference<Exception> ex = new AtomicReference<Exception>();
    for (int i = 0; i < concurrentReads; ++i) {
      readThreads.add(
          new Thread() {

            int[] queries;

            {
              queries = new int[readsPerThread];
              for (int i = 0; i < queries.length; ++i) {
                queries[i] = random().nextInt(numDocs);
              }
            }

            @Override
            public void run() {
              for (int q : queries) {
                final Query query = new TermQuery(new Term("fld", "" + q));
                try {
                  final TopDocs topDocs = searcher.search(query, 1);
                  if (topDocs.totalHits != 1) {
                    throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits);
                  }
                  final Document sdoc = rd.document(topDocs.scoreDocs[0].doc);
                  if (sdoc == null || sdoc.get("fld") == null) {
                    throw new IllegalStateException("Could not find document " + q);
                  }
                  if (!Integer.toString(q).equals(sdoc.get("fld"))) {
                    throw new IllegalStateException(
                        "Expected " + q + ", but got " + sdoc.get("fld"));
                  }
                } catch (Exception e) {
                  ex.compareAndSet(null, e);
                }
              }
            }
          });
    }
    for (Thread thread : readThreads) {
      thread.start();
    }
    for (Thread thread : readThreads) {
      thread.join();
    }
    rd.close();
    if (ex.get() != null) {
      throw ex.get();
    }

    iw.close();
    dir.close();
  }
  @Nightly
  public void test() throws Exception {
    MockDirectoryWrapper dir =
        new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields")));
    dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);

    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());
    iwc.setMergePolicy(newLogMergePolicy(false, 10));
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    // TODO: we disable "Compressing" since it likes to pick very extreme values which will be too
    // slow for this test.
    // maybe we should factor out crazy cases to ExtremeCompressing? then annotations can handle
    // this stuff...
    if (random().nextBoolean()) {
      iwc.setCodec(CompressingCodec.reasonableInstance(random()));
    }

    IndexWriter w = new IndexWriter(dir, iwc);

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    final Document doc = new Document();
    final FieldType ft = new FieldType();
    ft.setStored(true);
    ft.freeze();
    final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20);
    final byte[] value = new byte[valueLength];
    for (int i = 0; i < valueLength; ++i) {
      // random so that even compressing codecs can't compress it
      value[i] = (byte) random().nextInt(256);
    }
    final Field f = new Field("fld", value, ft);
    doc.add(f);

    final int numDocs = (int) ((1L << 32) / valueLength + 100);
    for (int i = 0; i < numDocs; ++i) {
      w.addDocument(doc);
      if (VERBOSE && i % (numDocs / 10) == 0) {
        System.out.println(i + " of " + numDocs + "...");
      }
    }
    w.forceMerge(1);
    w.close();
    if (VERBOSE) {
      boolean found = false;
      for (String file : dir.listAll()) {
        if (file.endsWith(".fdt")) {
          final long fileLength = dir.fileLength(file);
          if (fileLength >= 1L << 32) {
            found = true;
          }
          System.out.println("File length of " + file + " : " + fileLength);
        }
      }
      if (!found) {
        System.out.println("No .fdt file larger than 4GB, test bug?");
      }
    }

    DirectoryReader rd = DirectoryReader.open(dir);
    Document sd = rd.document(numDocs - 1);
    assertNotNull(sd);
    assertEquals(1, sd.getFields().size());
    BytesRef valueRef = sd.getBinaryValue("fld");
    assertNotNull(valueRef);
    assertEquals(new BytesRef(value), valueRef);
    rd.close();

    dir.close();
  }
예제 #11
0
  public void testDeletes1() throws Exception {
    // IndexWriter.debug2 = System.out;
    Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory());
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setMaxBufferedDocs(5000);
    iwc.setRAMBufferSizeMB(100);
    RangeMergePolicy fsmp = new RangeMergePolicy(false);
    iwc.setMergePolicy(fsmp);
    IndexWriter writer = new IndexWriter(dir, iwc);
    for (int x = 0; x < 5; x++) {
      writer.addDocument(DocHelper.createDocument(x, "1", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }
    // System.out.println("commit1");
    writer.commit();
    assertEquals(1, writer.segmentInfos.size());
    for (int x = 5; x < 10; x++) {
      writer.addDocument(DocHelper.createDocument(x, "2", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }
    // System.out.println("commit2");
    writer.commit();
    assertEquals(2, writer.segmentInfos.size());

    for (int x = 10; x < 15; x++) {
      writer.addDocument(DocHelper.createDocument(x, "3", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }

    writer.deleteDocuments(new Term("id", "1"));

    writer.deleteDocuments(new Term("id", "11"));

    // flushing without applying deletes means
    // there will still be deletes in the segment infos
    writer.flush(false, false);
    assertTrue(writer.bufferedUpdatesStream.any());

    // get reader flushes pending deletes
    // so there should not be anymore
    IndexReader r1 = writer.getReader();
    assertFalse(writer.bufferedUpdatesStream.any());
    r1.close();

    // delete id:2 from the first segment
    // merge segments 0 and 1
    // which should apply the delete id:2
    writer.deleteDocuments(new Term("id", "2"));
    writer.flush(false, false);
    fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy();
    fsmp.doMerge = true;
    fsmp.start = 0;
    fsmp.length = 2;
    writer.maybeMerge();

    assertEquals(2, writer.segmentInfos.size());

    // id:2 shouldn't exist anymore because
    // it's been applied in the merge and now it's gone
    IndexReader r2 = writer.getReader();
    int[] id2docs = toDocsArray(new Term("id", "2"), null, r2);
    assertTrue(id2docs == null);
    r2.close();

    /**
     * // added docs are in the ram buffer for (int x = 15; x < 20; x++) {
     * writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2));
     * System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); }
     * assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new
     * Term("id", Integer.toString(13)));
     *
     * <p>Term id3 = new Term("id", Integer.toString(3));
     *
     * <p>// delete from the 1st segment writer.deleteDocuments(id3);
     *
     * <p>assertTrue(writer.numRamDocs() > 0);
     *
     * <p>//System.out // .println("segdels1:" + writer.docWriter.deletesToString());
     *
     * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
     *
     * <p>// we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2;
     * System.out.println("maybeMerge "+writer.segmentInfos);
     *
     * <p>SegmentInfo info0 = writer.segmentInfos.info(0); SegmentInfo info1 =
     * writer.segmentInfos.info(1);
     *
     * <p>writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there
     * should be docs in RAM assertTrue(writer.numRamDocs() > 0);
     *
     * <p>// assert we've merged the 1 and 2 segments // and still have a segment leftover == 2
     * assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0,
     * writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos));
     *
     * <p>//System.out.println("segdels2:" + writer.docWriter.deletesToString());
     *
     * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
     *
     * <p>IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0];
     * printDelDocs(r1.getLiveDocs()); int[] docs = toDocsArray(id3, null, r);
     * System.out.println("id3 docs:"+Arrays.toString(docs)); // there shouldn't be any docs for
     * id:3 assertTrue(docs == null); r.close();
     *
     * <p>part2(writer, fsmp);
     */
    // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
    // System.out.println("close");
    writer.close();
    dir.close();
  }
  public void testLiveMaxMergeCount() throws Exception {
    Directory d = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    TieredMergePolicy tmp = new TieredMergePolicy();
    tmp.setSegmentsPerTier(1000);
    tmp.setMaxMergeAtOnce(1000);
    tmp.setMaxMergeAtOnceExplicit(10);
    iwc.setMergePolicy(tmp);
    iwc.setMaxBufferedDocs(2);
    iwc.setRAMBufferSizeMB(-1);

    final AtomicInteger maxRunningMergeCount = new AtomicInteger();

    ConcurrentMergeScheduler cms =
        new ConcurrentMergeScheduler() {

          final AtomicInteger runningMergeCount = new AtomicInteger();

          @Override
          public void doMerge(MergePolicy.OneMerge merge) throws IOException {
            int count = runningMergeCount.incrementAndGet();
            // evil?
            synchronized (this) {
              if (count > maxRunningMergeCount.get()) {
                maxRunningMergeCount.set(count);
              }
            }
            try {
              super.doMerge(merge);
            } finally {
              runningMergeCount.decrementAndGet();
            }
          }
        };

    cms.setMaxMergesAndThreads(5, 3);

    iwc.setMergeScheduler(cms);

    IndexWriter w = new IndexWriter(d, iwc);
    // Makes 100 segments
    for (int i = 0; i < 200; i++) {
      w.addDocument(new Document());
    }

    // No merges should have run so far, because TMP has high segmentsPerTier:
    assertEquals(0, maxRunningMergeCount.get());

    w.forceMerge(1);

    // At most 5 merge threads should have launched at once:
    assertTrue("maxRunningMergeCount=" + maxRunningMergeCount, maxRunningMergeCount.get() <= 5);
    maxRunningMergeCount.set(0);

    // Makes another 100 segments
    for (int i = 0; i < 200; i++) {
      w.addDocument(new Document());
    }

    ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).setMaxMergesAndThreads(1, 1);
    w.forceMerge(1);

    // At most 1 merge thread should have launched at once:
    assertEquals(1, maxRunningMergeCount.get());

    w.close();
    d.close();
  }
  // LUCENE-4544
  public void testMaxMergeCount() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));

    final int maxMergeCount = TestUtil.nextInt(random(), 1, 5);
    final int maxMergeThreads = TestUtil.nextInt(random(), 1, maxMergeCount);
    final CountDownLatch enoughMergesWaiting = new CountDownLatch(maxMergeCount);
    final AtomicInteger runningMergeCount = new AtomicInteger(0);
    final AtomicBoolean failed = new AtomicBoolean();

    if (VERBOSE) {
      System.out.println(
          "TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads);
    }

    ConcurrentMergeScheduler cms =
        new ConcurrentMergeScheduler() {

          @Override
          protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
            try {
              // Stall all incoming merges until we see
              // maxMergeCount:
              int count = runningMergeCount.incrementAndGet();
              try {
                assertTrue(
                    "count=" + count + " vs maxMergeCount=" + maxMergeCount,
                    count <= maxMergeCount);
                enoughMergesWaiting.countDown();

                // Stall this merge until we see exactly
                // maxMergeCount merges waiting
                while (true) {
                  if (enoughMergesWaiting.await(10, TimeUnit.MILLISECONDS) || failed.get()) {
                    break;
                  }
                }
                // Then sleep a bit to give a chance for the bug
                // (too many pending merges) to appear:
                Thread.sleep(20);
                super.doMerge(merge);
              } finally {
                runningMergeCount.decrementAndGet();
              }
            } catch (Throwable t) {
              failed.set(true);
              writer.mergeFinish(merge);
              throw new RuntimeException(t);
            }
          }
        };
    cms.setMaxMergesAndThreads(maxMergeCount, maxMergeThreads);
    iwc.setMergeScheduler(cms);
    iwc.setMaxBufferedDocs(2);

    TieredMergePolicy tmp = new TieredMergePolicy();
    iwc.setMergePolicy(tmp);
    tmp.setMaxMergeAtOnce(2);
    tmp.setSegmentsPerTier(2);

    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(newField("field", "field", TextField.TYPE_NOT_STORED));
    while (enoughMergesWaiting.getCount() != 0 && !failed.get()) {
      for (int i = 0; i < 10; i++) {
        w.addDocument(doc);
      }
    }
    w.close(false);
    dir.close();
  }
예제 #14
0
  public void testLongPostings() throws Exception {
    // Don't use _TestUtil.getTempDir so that we own the
    // randomness (ie same seed will point to same dir):
    Directory dir =
        newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random().nextLong()));

    final int NUM_DOCS = atLeast(2000);

    if (VERBOSE) {
      System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
    }

    final String s1 = getRandomTerm(null);
    final String s2 = getRandomTerm(s1);

    if (VERBOSE) {
      System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
      /*
      for(int idx=0;idx<s1.length();idx++) {
        System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
      }
      for(int idx=0;idx<s2.length();idx++) {
        System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
      }
      */
    }

    final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
    for (int idx = 0; idx < NUM_DOCS; idx++) {
      if (random().nextBoolean()) {
        isS1.set(idx);
      }
    }

    final IndexReader r;
    final IndexWriterConfig iwc =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
            .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
            .setMergePolicy(newLogMergePolicy());
    iwc.setRAMBufferSizeMB(16.0 + 16.0 * random().nextDouble());
    iwc.setMaxBufferedDocs(-1);
    final RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

    for (int idx = 0; idx < NUM_DOCS; idx++) {
      final Document doc = new Document();
      String s = isS1.get(idx) ? s1 : s2;
      final Field f = newTextField("field", s, Field.Store.NO);
      final int count = _TestUtil.nextInt(random(), 1, 4);
      for (int ct = 0; ct < count; ct++) {
        doc.add(f);
      }
      riw.addDocument(doc);
    }

    r = riw.getReader();
    riw.close();

    /*
    if (VERBOSE) {
      System.out.println("TEST: terms");
      TermEnum termEnum = r.terms();
      while(termEnum.next()) {
        System.out.println("  term=" + termEnum.term() + " len=" + termEnum.term().text().length());
        assertTrue(termEnum.docFreq() > 0);
        System.out.println("    s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
        System.out.println("    s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
        final String s = termEnum.term().text();
        for(int idx=0;idx<s.length();idx++) {
          System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
        }
      }
    }
    */

    assertEquals(NUM_DOCS, r.numDocs());
    assertTrue(r.docFreq(new Term("field", s1)) > 0);
    assertTrue(r.docFreq(new Term("field", s2)) > 0);

    int num = atLeast(1000);
    for (int iter = 0; iter < num; iter++) {

      final String term;
      final boolean doS1;
      if (random().nextBoolean()) {
        term = s1;
        doS1 = true;
      } else {
        term = s2;
        doS1 = false;
      }

      if (VERBOSE) {
        System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
      }

      final DocsAndPositionsEnum postings =
          MultiFields.getTermPositionsEnum(r, null, "field", new BytesRef(term));

      int docID = -1;
      while (docID < DocIdSetIterator.NO_MORE_DOCS) {
        final int what = random().nextInt(3);
        if (what == 0) {
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do next()");
          }
          // nextDoc
          int expected = docID + 1;
          while (true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          docID = postings.nextDoc();
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }

          if (random().nextInt(6) == 3) {
            final int freq = postings.freq();
            assertTrue(freq >= 1 && freq <= 4);
            for (int pos = 0; pos < freq; pos++) {
              assertEquals(pos, postings.nextPosition());
              if (random().nextBoolean()) {
                postings.getPayload();
                if (random().nextBoolean()) {
                  postings.getPayload(); // get it again
                }
              }
            }
          }
        } else {
          // advance
          final int targetDocID;
          if (docID == -1) {
            targetDocID = random().nextInt(NUM_DOCS + 1);
          } else {
            targetDocID = docID + _TestUtil.nextInt(random(), 1, NUM_DOCS - docID);
          }
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
          }
          int expected = targetDocID;
          while (true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }

          docID = postings.advance(targetDocID);
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }

          if (random().nextInt(6) == 3) {
            final int freq = postings.freq();
            assertTrue(freq >= 1 && freq <= 4);
            for (int pos = 0; pos < freq; pos++) {
              assertEquals(pos, postings.nextPosition());
              if (random().nextBoolean()) {
                postings.getPayload();
                if (random().nextBoolean()) {
                  postings.getPayload(); // get it again
                }
              }
            }
          }
        }
      }
    }
    r.close();
    dir.close();
  }
  // LUCENE-5644: index docs w/ multiple threads but in between flushes we limit how many threads
  // can index concurrently in the next
  // iteration, and then verify that no more segments were flushed than number of threads:
  public void testSegmentCountOnFlushRandom() throws Exception {
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));

    int maxThreadStates = TestUtil.nextInt(random(), 1, 12);

    if (VERBOSE) {
      System.out.println("TEST: maxThreadStates=" + maxThreadStates);
    }

    // Never trigger flushes (so we only flush on getReader):
    iwc.setMaxBufferedDocs(100000000);
    iwc.setRAMBufferSizeMB(-1);
    iwc.setMaxThreadStates(maxThreadStates);

    // Never trigger merges (so we can simplistically count flushed segments):
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);

    final IndexWriter w = new IndexWriter(dir, iwc);

    // How many threads are indexing in the current cycle:
    final AtomicInteger indexingCount = new AtomicInteger();

    // How many threads we will use on each cycle:
    final AtomicInteger maxThreadCount = new AtomicInteger();

    CheckSegmentCount checker = new CheckSegmentCount(w, maxThreadCount, indexingCount);

    // We spin up 10 threads up front, but then in between flushes we limit how many can run on each
    // iteration
    final int ITERS = 100;
    Thread[] threads = new Thread[MAX_THREADS_AT_ONCE];

    // We use this to stop all threads once they've indexed their docs in the current iter, and pull
    // a new NRT reader, and verify the
    // segment count:
    final CyclicBarrier barrier = new CyclicBarrier(MAX_THREADS_AT_ONCE, checker);

    for (int i = 0; i < threads.length; i++) {
      threads[i] =
          new Thread() {
            @Override
            public void run() {
              try {
                for (int iter = 0; iter < ITERS; iter++) {
                  if (indexingCount.incrementAndGet() <= maxThreadCount.get()) {
                    if (VERBOSE) {
                      System.out.println(
                          "TEST: " + Thread.currentThread().getName() + ": do index");
                    }

                    // We get to index on this cycle:
                    Document doc = new Document();
                    doc.add(
                        new TextField(
                            "field",
                            "here is some text that is a bit longer than normal trivial text",
                            Field.Store.NO));
                    for (int j = 0; j < 200; j++) {
                      w.addDocument(doc);
                    }
                  } else {
                    // We lose: no indexing for us on this cycle
                    if (VERBOSE) {
                      System.out.println(
                          "TEST: " + Thread.currentThread().getName() + ": don't index");
                    }
                  }
                  barrier.await();
                }
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };
      threads[i].start();
    }

    for (Thread t : threads) {
      t.join();
    }

    IOUtils.close(checker, w, dir);
  }