public void testEmptyDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); // make sure that the fact that documents might be empty is not a problem final Document emptyDoc = new Document(); final int numDocs = random().nextBoolean() ? 1 : atLeast(1000); for (int i = 0; i < numDocs; ++i) { iw.addDocument(emptyDoc); } iw.commit(); final DirectoryReader rd = DirectoryReader.open(dir); for (int i = 0; i < numDocs; ++i) { final Document doc = rd.document(i); assertNotNull(doc); assertTrue(doc.getFields().isEmpty()); } rd.close(); iw.close(); dir.close(); }
public void testTotalBytesSize() throws Exception { Directory d = newDirectory(); if (d instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(5); iwc.setMergeScheduler(new TrackingCMS()); if (TestUtil.getPostingsFormat("id").equals("SimpleText")) { // no iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())); } IndexWriter w = new IndexWriter(d, iwc); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, Field.Store.NO)); w.addDocument(doc); if (random().nextBoolean()) { w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1))); } } assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0); w.close(); d.close(); }
public void testReadSkip() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); FieldType ft = new FieldType(); ft.setStored(true); ft.freeze(); final String string = _TestUtil.randomSimpleString(random(), 50); final byte[] bytes = string.getBytes("UTF-8"); final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong(); final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt(); final float f = random().nextFloat(); final double d = random().nextDouble(); List<Field> fields = Arrays.asList( new Field("bytes", bytes, ft), new Field("string", string, ft), new LongField("long", l, Store.YES), new IntField("int", i, Store.YES), new FloatField("float", f, Store.YES), new DoubleField("double", d, Store.YES)); for (int k = 0; k < 100; ++k) { Document doc = new Document(); for (Field fld : fields) { doc.add(fld); } iw.w.addDocument(doc); } iw.commit(); final DirectoryReader reader = DirectoryReader.open(dir); final int docID = random().nextInt(100); for (Field fld : fields) { String fldName = fld.name(); final Document sDoc = reader.document(docID, Collections.singleton(fldName)); final IndexableField sField = sDoc.getField(fldName); if (Field.class.equals(fld.getClass())) { assertEquals(fld.binaryValue(), sField.binaryValue()); assertEquals(fld.stringValue(), sField.stringValue()); } else { assertEquals(fld.numericValue(), sField.numericValue()); } } reader.close(); iw.close(); dir.close(); }
protected void runFlushByRam(int numThreads, double maxRamMB, boolean ensureNotStalled) throws IOException, InterruptedException { final int numDocumentsToIndex = 10 + atLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setFlushPolicy(flushPolicy); final int numDWPT = 1 + atLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.setIndexerThreadPool(threadPool); iwc.setRAMBufferSizeMB(maxRamMB); iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy(); assertFalse(flushPolicy.flushOnDocCount()); assertFalse(flushPolicy.flushOnDeleteTerms()); assertTrue(flushPolicy.flushOnRAM()); DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); assertEquals(" all flushes must be due numThreads=" + numThreads, 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); assertTrue( "peak bytes without flush exceeded watermark", flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); assertActiveBytesAfter(flushControl); if (flushPolicy.hasMarkedPending) { assertTrue(maxRAMBytes < flushControl.peakActiveBytes); } if (ensureNotStalled) { assertFalse(docsWriter.flushControl.stallControl.wasStalled()); } writer.close(); assertEquals(0, flushControl.activeBytes()); dir.close(); }
public void testFlushDocCount() throws IOException, InterruptedException { int[] numThreads = new int[] {2 + atLeast(1), 1}; for (int i = 0; i < numThreads.length; i++) { final int numDocumentsToIndex = 50 + atLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setFlushPolicy(flushPolicy); final int numDWPT = 1 + atLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.setIndexerThreadPool(threadPool); iwc.setMaxBufferedDocs(2 + atLeast(10)); iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy(); assertTrue(flushPolicy.flushOnDocCount()); assertFalse(flushPolicy.flushOnDeleteTerms()); assertFalse(flushPolicy.flushOnRAM()); DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } assertEquals( " all flushes must be due numThreads=" + numThreads[i], 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); assertTrue( "peak bytes without flush exceeded watermark", flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs()); assertActiveBytesAfter(flushControl); writer.close(); assertEquals(0, flushControl.activeBytes()); dir.close(); } }
public void testStallControl() throws InterruptedException, IOException { int[] numThreads = new int[] {4 + random().nextInt(8), 1}; final int numDocumentsToIndex = 50 + random().nextInt(50); for (int i = 0; i < numThreads.length; i++) { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); MockDirectoryWrapper dir = newMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.setThrottling(MockDirectoryWrapper.Throttling.SOMETIMES); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.setFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.setIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.setRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); if (numThreads[i] == 1) { assertFalse( "single thread must not block numThreads: " + numThreads[i], docsWriter.flushControl.stallControl.hasBlocked()); } if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) { assertTrue(docsWriter.flushControl.stallControl.wasStalled()); } assertActiveBytesAfter(flushControl); writer.close(true); dir.close(); } }
@Nightly public void testBigDocuments() throws IOException { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper( random(), new MMapDirectory(_TestUtil.getTempDir("testBigDocuments"))); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER); } final Document emptyDoc = new Document(); // emptyDoc final Document bigDoc1 = new Document(); // lot of small fields final Document bigDoc2 = new Document(); // 1 very big field final Field idField = new StringField("id", "", Store.NO); emptyDoc.add(idField); bigDoc1.add(idField); bigDoc2.add(idField); final FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.setIndexed(false); final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored); final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.add(smallField); } final Field bigField = new Field( "fld", randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored); bigDoc2.add(bigField); final int numDocs = atLeast(5); final Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.setStringValue("" + i); iw.addDocument(docs[i]); if (random().nextInt(numDocs) == 0) { iw.commit(); } } iw.commit(); iw.forceMerge(1); // look at what happens when big docs are merged final DirectoryReader rd = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { final Query query = new TermQuery(new Term("id", "" + i)); final TopDocs topDocs = searcher.search(query, 1); assertEquals("" + i, 1, topDocs.totalHits); final Document doc = rd.document(topDocs.scoreDocs[0].doc); assertNotNull(doc); final IndexableField[] fieldValues = doc.getFields("fld"); assertEquals(docs[i].getFields("fld").length, fieldValues.length); if (fieldValues.length > 0) { assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue()); } } rd.close(); iw.close(); dir.close(); }
public void testWriteReadMerge() throws IOException { // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone()); final int docCount = atLeast(200); final byte[][][] data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { final int fieldCount = rarely() ? RandomInts.randomIntBetween(random(), 1, 500) : RandomInts.randomIntBetween(random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { final int length = rarely() ? random().nextInt(1000) : random().nextInt(10); final int max = rarely() ? 256 : 2; data[i][j] = randomByteArray(length, max); } } final FieldType type = new FieldType(StringField.TYPE_STORED); type.setIndexed(false); type.freeze(); IntField id = new IntField("id", 0, Store.YES); for (int i = 0; i < data.length; ++i) { Document doc = new Document(); doc.add(id); id.setIntValue(i); for (int j = 0; j < data[i].length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.add(f); } iw.w.addDocument(doc); if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // test merging against a non-compressing codec if (iwConf.getCodec() == otherCodec) { iwConf.setCodec(Codec.getDefault()); } else { iwConf.setCodec(otherCodec); } iw = new RandomIndexWriter(random(), dir, iwConf.clone()); } } for (int i = 0; i < 10; ++i) { final int min = random().nextInt(data.length); final int max = min + random().nextInt(20); iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false)); } iw.forceMerge(2); // force merges with deletions iw.commit(); final DirectoryReader ir = DirectoryReader.open(dir); assertTrue(ir.numDocs() > 0); int numDocs = 0; for (int i = 0; i < ir.maxDoc(); ++i) { final Document doc = ir.document(i); if (doc == null) { continue; } ++numDocs; final int docId = doc.getField("id").numericValue().intValue(); assertEquals(data[docId].length + 1, doc.getFields().size()); for (int j = 0; j < data[docId].length; ++j) { final byte[] arr = data[docId][j]; final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length); assertArrayEquals(arr, arr2); } } assertTrue(ir.numDocs() <= numDocs); ir.close(); iw.deleteAll(); iw.commit(); iw.forceMerge(1); iw.close(); dir.close(); }
public void testConcurrentReads() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); // make sure the readers are properly cloned final Document doc = new Document(); final Field field = new StringField("fld", "", Store.YES); doc.add(field); final int numDocs = atLeast(1000); for (int i = 0; i < numDocs; ++i) { field.setStringValue("" + i); iw.addDocument(doc); } iw.commit(); final DirectoryReader rd = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(rd); final int concurrentReads = atLeast(5); final int readsPerThread = atLeast(50); final List<Thread> readThreads = new ArrayList<Thread>(); final AtomicReference<Exception> ex = new AtomicReference<Exception>(); for (int i = 0; i < concurrentReads; ++i) { readThreads.add( new Thread() { int[] queries; { queries = new int[readsPerThread]; for (int i = 0; i < queries.length; ++i) { queries[i] = random().nextInt(numDocs); } } @Override public void run() { for (int q : queries) { final Query query = new TermQuery(new Term("fld", "" + q)); try { final TopDocs topDocs = searcher.search(query, 1); if (topDocs.totalHits != 1) { throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits); } final Document sdoc = rd.document(topDocs.scoreDocs[0].doc); if (sdoc == null || sdoc.get("fld") == null) { throw new IllegalStateException("Could not find document " + q); } if (!Integer.toString(q).equals(sdoc.get("fld"))) { throw new IllegalStateException( "Expected " + q + ", but got " + sdoc.get("fld")); } } catch (Exception e) { ex.compareAndSet(null, e); } } } }); } for (Thread thread : readThreads) { thread.start(); } for (Thread thread : readThreads) { thread.join(); } rd.close(); if (ex.get() != null) { throw ex.get(); } iw.close(); dir.close(); }
@Nightly public void test() throws Exception { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setRAMBufferSizeMB(256.0); iwc.setMergeScheduler(new ConcurrentMergeScheduler()); iwc.setMergePolicy(newLogMergePolicy(false, 10)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // TODO: we disable "Compressing" since it likes to pick very extreme values which will be too // slow for this test. // maybe we should factor out crazy cases to ExtremeCompressing? then annotations can handle // this stuff... if (random().nextBoolean()) { iwc.setCodec(CompressingCodec.reasonableInstance(random())); } IndexWriter w = new IndexWriter(dir, iwc); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setStored(true); ft.freeze(); final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20); final byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte) random().nextInt(256); } final Field f = new Field("fld", value, ft); doc.add(f); final int numDocs = (int) ((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.addDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { System.out.println(i + " of " + numDocs + "..."); } } w.forceMerge(1); w.close(); if (VERBOSE) { boolean found = false; for (String file : dir.listAll()) { if (file.endsWith(".fdt")) { final long fileLength = dir.fileLength(file); if (fileLength >= 1L << 32) { found = true; } System.out.println("File length of " + file + " : " + fileLength); } } if (!found) { System.out.println("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.open(dir); Document sd = rd.document(numDocs - 1); assertNotNull(sd); assertEquals(1, sd.getFields().size()); BytesRef valueRef = sd.getBinaryValue("fld"); assertNotNull(valueRef); assertEquals(new BytesRef(value), valueRef); rd.close(); dir.close(); }
public void testDeletes1() throws Exception { // IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setMaxBufferedDocs(5000); iwc.setRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(false); iwc.setMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.addDocument(DocHelper.createDocument(x, "1", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit1"); writer.commit(); assertEquals(1, writer.segmentInfos.size()); for (int x = 5; x < 10; x++) { writer.addDocument(DocHelper.createDocument(x, "2", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit2"); writer.commit(); assertEquals(2, writer.segmentInfos.size()); for (int x = 10; x < 15; x++) { writer.addDocument(DocHelper.createDocument(x, "3", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.deleteDocuments(new Term("id", "1")); writer.deleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); assertTrue(writer.bufferedUpdatesStream.any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); assertFalse(writer.bufferedUpdatesStream.any()); r1.close(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.deleteDocuments(new Term("id", "2")); writer.flush(false, false); fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy(); fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.maybeMerge(); assertEquals(2, writer.segmentInfos.size()); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.getReader(); int[] id2docs = toDocsArray(new Term("id", "2"), null, r2); assertTrue(id2docs == null); r2.close(); /** * // added docs are in the ram buffer for (int x = 15; x < 20; x++) { * writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2)); * System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } * assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new * Term("id", Integer.toString(13))); * * <p>Term id3 = new Term("id", Integer.toString(3)); * * <p>// delete from the 1st segment writer.deleteDocuments(id3); * * <p>assertTrue(writer.numRamDocs() > 0); * * <p>//System.out // .println("segdels1:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>// we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; * System.out.println("maybeMerge "+writer.segmentInfos); * * <p>SegmentInfo info0 = writer.segmentInfos.info(0); SegmentInfo info1 = * writer.segmentInfos.info(1); * * <p>writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there * should be docs in RAM assertTrue(writer.numRamDocs() > 0); * * <p>// assert we've merged the 1 and 2 segments // and still have a segment leftover == 2 * assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0, * writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos)); * * <p>//System.out.println("segdels2:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0]; * printDelDocs(r1.getLiveDocs()); int[] docs = toDocsArray(id3, null, r); * System.out.println("id3 docs:"+Arrays.toString(docs)); // there shouldn't be any docs for * id:3 assertTrue(docs == null); r.close(); * * <p>part2(writer, fsmp); */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); // System.out.println("close"); writer.close(); dir.close(); }
public void testLiveMaxMergeCount() throws Exception { Directory d = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); TieredMergePolicy tmp = new TieredMergePolicy(); tmp.setSegmentsPerTier(1000); tmp.setMaxMergeAtOnce(1000); tmp.setMaxMergeAtOnceExplicit(10); iwc.setMergePolicy(tmp); iwc.setMaxBufferedDocs(2); iwc.setRAMBufferSizeMB(-1); final AtomicInteger maxRunningMergeCount = new AtomicInteger(); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() { final AtomicInteger runningMergeCount = new AtomicInteger(); @Override public void doMerge(MergePolicy.OneMerge merge) throws IOException { int count = runningMergeCount.incrementAndGet(); // evil? synchronized (this) { if (count > maxRunningMergeCount.get()) { maxRunningMergeCount.set(count); } } try { super.doMerge(merge); } finally { runningMergeCount.decrementAndGet(); } } }; cms.setMaxMergesAndThreads(5, 3); iwc.setMergeScheduler(cms); IndexWriter w = new IndexWriter(d, iwc); // Makes 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } // No merges should have run so far, because TMP has high segmentsPerTier: assertEquals(0, maxRunningMergeCount.get()); w.forceMerge(1); // At most 5 merge threads should have launched at once: assertTrue("maxRunningMergeCount=" + maxRunningMergeCount, maxRunningMergeCount.get() <= 5); maxRunningMergeCount.set(0); // Makes another 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).setMaxMergesAndThreads(1, 1); w.forceMerge(1); // At most 1 merge thread should have launched at once: assertEquals(1, maxRunningMergeCount.get()); w.close(); d.close(); }
// LUCENE-4544 public void testMaxMergeCount() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); final int maxMergeCount = TestUtil.nextInt(random(), 1, 5); final int maxMergeThreads = TestUtil.nextInt(random(), 1, maxMergeCount); final CountDownLatch enoughMergesWaiting = new CountDownLatch(maxMergeCount); final AtomicInteger runningMergeCount = new AtomicInteger(0); final AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { System.out.println( "TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() { @Override protected void doMerge(MergePolicy.OneMerge merge) throws IOException { try { // Stall all incoming merges until we see // maxMergeCount: int count = runningMergeCount.incrementAndGet(); try { assertTrue( "count=" + count + " vs maxMergeCount=" + maxMergeCount, count <= maxMergeCount); enoughMergesWaiting.countDown(); // Stall this merge until we see exactly // maxMergeCount merges waiting while (true) { if (enoughMergesWaiting.await(10, TimeUnit.MILLISECONDS) || failed.get()) { break; } } // Then sleep a bit to give a chance for the bug // (too many pending merges) to appear: Thread.sleep(20); super.doMerge(merge); } finally { runningMergeCount.decrementAndGet(); } } catch (Throwable t) { failed.set(true); writer.mergeFinish(merge); throw new RuntimeException(t); } } }; cms.setMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.setMergeScheduler(cms); iwc.setMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.setMergePolicy(tmp); tmp.setMaxMergeAtOnce(2); tmp.setSegmentsPerTier(2); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(newField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.getCount() != 0 && !failed.get()) { for (int i = 0; i < 10; i++) { w.addDocument(doc); } } w.close(false); dir.close(); }
public void testLongPostings() throws Exception { // Don't use _TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random().nextLong())); final int NUM_DOCS = atLeast(2000); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS); } final String s1 = getRandomTerm(null); final String s2 = getRandomTerm(s1); if (VERBOSE) { System.out.println("\nTEST: s1=" + s1 + " s2=" + s2); /* for(int idx=0;idx<s1.length();idx++) { System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); } for(int idx=0;idx<s2.length();idx++) { System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); } */ } final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (random().nextBoolean()) { isS1.set(idx); } } final IndexReader r; final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setMergePolicy(newLogMergePolicy()); iwc.setRAMBufferSizeMB(16.0 + 16.0 * random().nextDouble()); iwc.setMaxBufferedDocs(-1); final RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); for (int idx = 0; idx < NUM_DOCS; idx++) { final Document doc = new Document(); String s = isS1.get(idx) ? s1 : s2; final Field f = newTextField("field", s, Field.Store.NO); final int count = _TestUtil.nextInt(random(), 1, 4); for (int ct = 0; ct < count; ct++) { doc.add(f); } riw.addDocument(doc); } r = riw.getReader(); riw.close(); /* if (VERBOSE) { System.out.println("TEST: terms"); TermEnum termEnum = r.terms(); while(termEnum.next()) { System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length()); assertTrue(termEnum.docFreq() > 0); System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length()); System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length()); final String s = termEnum.term().text(); for(int idx=0;idx<s.length();idx++) { System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); } } } */ assertEquals(NUM_DOCS, r.numDocs()); assertTrue(r.docFreq(new Term("field", s1)) > 0); assertTrue(r.docFreq(new Term("field", s2)) > 0); int num = atLeast(1000); for (int iter = 0; iter < num; iter++) { final String term; final boolean doS1; if (random().nextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1); } final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { final int what = random().nextInt(3); if (what == 0) { if (VERBOSE) { System.out.println("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = Integer.MAX_VALUE; break; } else if (isS1.get(expected) == doS1) { break; } else { expected++; } } docID = postings.nextDoc(); if (VERBOSE) { System.out.println(" got docID=" + docID); } assertEquals(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (random().nextInt(6) == 3) { final int freq = postings.freq(); assertTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { assertEquals(pos, postings.nextPosition()); if (random().nextBoolean()) { postings.getPayload(); if (random().nextBoolean()) { postings.getPayload(); // get it again } } } } } else { // advance final int targetDocID; if (docID == -1) { targetDocID = random().nextInt(NUM_DOCS + 1); } else { targetDocID = docID + _TestUtil.nextInt(random(), 1, NUM_DOCS - docID); } if (VERBOSE) { System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = Integer.MAX_VALUE; break; } else if (isS1.get(expected) == doS1) { break; } else { expected++; } } docID = postings.advance(targetDocID); if (VERBOSE) { System.out.println(" got docID=" + docID); } assertEquals(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (random().nextInt(6) == 3) { final int freq = postings.freq(); assertTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { assertEquals(pos, postings.nextPosition()); if (random().nextBoolean()) { postings.getPayload(); if (random().nextBoolean()) { postings.getPayload(); // get it again } } } } } } } r.close(); dir.close(); }
// LUCENE-5644: index docs w/ multiple threads but in between flushes we limit how many threads // can index concurrently in the next // iteration, and then verify that no more segments were flushed than number of threads: public void testSegmentCountOnFlushRandom() throws Exception { Directory dir = newFSDirectory(createTempDir()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); int maxThreadStates = TestUtil.nextInt(random(), 1, 12); if (VERBOSE) { System.out.println("TEST: maxThreadStates=" + maxThreadStates); } // Never trigger flushes (so we only flush on getReader): iwc.setMaxBufferedDocs(100000000); iwc.setRAMBufferSizeMB(-1); iwc.setMaxThreadStates(maxThreadStates); // Never trigger merges (so we can simplistically count flushed segments): iwc.setMergePolicy(NoMergePolicy.INSTANCE); final IndexWriter w = new IndexWriter(dir, iwc); // How many threads are indexing in the current cycle: final AtomicInteger indexingCount = new AtomicInteger(); // How many threads we will use on each cycle: final AtomicInteger maxThreadCount = new AtomicInteger(); CheckSegmentCount checker = new CheckSegmentCount(w, maxThreadCount, indexingCount); // We spin up 10 threads up front, but then in between flushes we limit how many can run on each // iteration final int ITERS = 100; Thread[] threads = new Thread[MAX_THREADS_AT_ONCE]; // We use this to stop all threads once they've indexed their docs in the current iter, and pull // a new NRT reader, and verify the // segment count: final CyclicBarrier barrier = new CyclicBarrier(MAX_THREADS_AT_ONCE, checker); for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { @Override public void run() { try { for (int iter = 0; iter < ITERS; iter++) { if (indexingCount.incrementAndGet() <= maxThreadCount.get()) { if (VERBOSE) { System.out.println( "TEST: " + Thread.currentThread().getName() + ": do index"); } // We get to index on this cycle: Document doc = new Document(); doc.add( new TextField( "field", "here is some text that is a bit longer than normal trivial text", Field.Store.NO)); for (int j = 0; j < 200; j++) { w.addDocument(doc); } } else { // We lose: no indexing for us on this cycle if (VERBOSE) { System.out.println( "TEST: " + Thread.currentThread().getName() + ": don't index"); } } barrier.await(); } } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } for (Thread t : threads) { t.join(); } IOUtils.close(checker, w, dir); }