@Override public void doWork() throws Throwable { IndexWriter writer1 = new IndexWriter( dir1, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(3) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(2))); ((ConcurrentMergeScheduler) writer1.getConfig().getMergeScheduler()).setSuppressExceptions(); // Intentionally use different params so flush/merge // happen @ different times IndexWriter writer2 = new IndexWriter( dir2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(3))); ((ConcurrentMergeScheduler) writer2.getConfig().getMergeScheduler()).setSuppressExceptions(); update(writer1); update(writer2); TestTransactions.doFail = true; try { synchronized (lock) { try { writer1.prepareCommit(); } catch (Throwable t) { writer1.rollback(); writer2.rollback(); return; } try { writer2.prepareCommit(); } catch (Throwable t) { writer1.rollback(); writer2.rollback(); return; } writer1.commit(); writer2.commit(); } } finally { TestTransactions.doFail = false; } writer1.close(); writer2.close(); }
private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.add( newField( "f" + f, data[f % data.length] + '#' + data[random.nextInt(data.length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); } writer.close(); } catch (Exception e) { throw new RuntimeException(e); } return dir; }
public void testTotalBytesSize() throws Exception { Directory d = newDirectory(); if (d instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMaxBufferedDocs(5); iwc.setMergeScheduler(new TrackingCMS()); if (TestUtil.getPostingsFormat("id").equals("SimpleText")) { // no iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())); } IndexWriter w = new IndexWriter(d, iwc); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, Field.Store.NO)); w.addDocument(doc); if (random().nextBoolean()) { w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1))); } } assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0); w.close(); d.close(); }
// Verifies no *.nrm exists when all fields omit norms: public void testNoNrmFile() throws Throwable { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f1 = newField("f1", "This field has no norms", customType); d.add(f1); for (int i = 0; i < 30; i++) { writer.addDocument(d); } writer.commit(); assertNoNrm(ram); // force merge writer.forceMerge(1); // flush writer.close(); assertNoNrm(ram); ram.close(); }
@Override public void run() { try { int oldSegmentCount = r.leaves().size(); DirectoryReader r2 = DirectoryReader.openIfChanged(r); assertNotNull(r2); r.close(); r = r2; int maxThreadStates = w.getConfig().getMaxThreadStates(); int maxExpectedSegments = oldSegmentCount + Math.min(maxThreadStates, maxThreadCountPerIter.get()); if (VERBOSE) { System.out.println( "TEST: iter done; now verify oldSegCount=" + oldSegmentCount + " newSegCount=" + r2.leaves().size() + " maxExpected=" + maxExpectedSegments); } // NOTE: it won't necessarily be ==, in case some threads were strangely scheduled and never // conflicted with one another (should be uncommon...?): assertTrue(r.leaves().size() <= maxExpectedSegments); setNextIterThreadCount(); } catch (Exception e) { throw new RuntimeException(e); } }
/* * Test a deletion policy that keeps last N commits. */ public void testKeepLastNDeletionPolicy() throws IOException { final int N = 5; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 17; i++) { addDoc(writer); } writer.forceMerge(1); writer.close(); } assertTrue(policy.numDelete > 0); assertEquals(N + 1, policy.numOnInit); assertEquals(N + 1, policy.numOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); if (i == N) { fail("should have failed on commits prior to last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
/* Test keeping NO commit points. This is a viable and * useful case eg where you want to build a big index and * you know there are no readers. */ public void testKeepNoneOnInitDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy()) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 107; i++) { addDoc(writer); } writer.close(); conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); writer = new IndexWriter(dir, conf); policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); assertEquals(2, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(2, policy.numOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.close(); } }
private void crash(final IndexWriter writer) throws IOException { final MockDirectoryWrapper dir = (MockDirectoryWrapper) writer.getDirectory(); ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler(); cms.sync(); dir.crash(); cms.sync(); dir.clearCrash(); }
public Map<String, Document> indexRandom( int nThreads, int iterations, int range, Directory dir, int maxThreadStates, boolean doReaderPooling) throws IOException, InterruptedException { Map<String, Document> docs = new HashMap<>(); IndexWriter w = RandomIndexWriter.mockIndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1) .setMaxBufferedDocs(maxBufferedDocs) .setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) .setReaderPooling(doReaderPooling) .setMergePolicy(newLogMergePolicy()), new YieldTestPoint()); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(mergeFactor); threads = new IndexingThread[nThreads]; for (int i = 0; i < threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000 * i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i = 0; i < threads.length; i++) { threads[i].start(); } for (int i = 0; i < threads.length; i++) { threads[i].join(); } // w.forceMerge(1); w.close(); for (int i = 0; i < threads.length; i++) { IndexingThread th = threads[i]; synchronized (th) { docs.putAll(th.docs); } } // System.out.println("TEST: checkindex"); TestUtil.checkIndex(dir); return docs; }
public IndexThread( AtomicInteger pendingDocs, int numThreads, IndexWriter writer, LineFileDocs docs, boolean doRandomCommit) { this.pendingDocs = pendingDocs; this.writer = writer; iwc = writer.getConfig(); this.docs = docs; this.doRandomCommit = doRandomCommit; }
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException { Map<String, Document> docs = new HashMap<>(); IndexWriter w = RandomIndexWriter.mockIndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1) .setMaxBufferedDocs(maxBufferedDocs) .setMergePolicy(newLogMergePolicy()), new YieldTestPoint()); w.commit(); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); lmp.setNoCFSRatio(0.0); lmp.setMergeFactor(mergeFactor); /** * * w.setMaxMergeDocs(Integer.MAX_VALUE); w.setMaxFieldLength(10000); w.setRAMBufferSizeMB(1); * w.setMergeFactor(10); * */ threads = new IndexingThread[nThreads]; for (int i = 0; i < threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000 * i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i = 0; i < threads.length; i++) { threads[i].start(); } for (int i = 0; i < threads.length; i++) { threads[i].join(); } // w.forceMerge(1); // w.close(); for (int i = 0; i < threads.length; i++) { IndexingThread th = threads[i]; synchronized (th) { docs.putAll(th.docs); } } TestUtil.checkIndex(dir); DocsAndWriter dw = new DocsAndWriter(); dw.docs = docs; dw.writer = w; return dw; }
protected void runFlushByRam(int numThreads, double maxRamMB, boolean ensureNotStalled) throws IOException, InterruptedException { final int numDocumentsToIndex = 10 + atLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setFlushPolicy(flushPolicy); final int numDWPT = 1 + atLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.setIndexerThreadPool(threadPool); iwc.setRAMBufferSizeMB(maxRamMB); iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy(); assertFalse(flushPolicy.flushOnDocCount()); assertFalse(flushPolicy.flushOnDeleteTerms()); assertTrue(flushPolicy.flushOnRAM()); DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); assertEquals(" all flushes must be due numThreads=" + numThreads, 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); assertTrue( "peak bytes without flush exceeded watermark", flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); assertActiveBytesAfter(flushControl); if (flushPolicy.hasMarkedPending) { assertTrue(maxRAMBytes < flushControl.peakActiveBytes); } if (ensureNotStalled) { assertFalse(docsWriter.flushControl.stallControl.wasStalled()); } writer.close(); assertEquals(0, flushControl.activeBytes()); dir.close(); }
public void testFlushDocCount() throws IOException, InterruptedException { int[] numThreads = new int[] {2 + atLeast(1), 1}; for (int i = 0; i < numThreads.length; i++) { final int numDocumentsToIndex = 50 + atLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setFlushPolicy(flushPolicy); final int numDWPT = 1 + atLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.setIndexerThreadPool(threadPool); iwc.setMaxBufferedDocs(2 + atLeast(10)); iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy(); assertTrue(flushPolicy.flushOnDocCount()); assertFalse(flushPolicy.flushOnDeleteTerms()); assertFalse(flushPolicy.flushOnRAM()); DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads[i], writer, lineDocFile, false); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } assertEquals( " all flushes must be due numThreads=" + numThreads[i], 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); assertTrue( "peak bytes without flush exceeded watermark", flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs()); assertActiveBytesAfter(flushControl); writer.close(); assertEquals(0, flushControl.activeBytes()); dir.close(); } }
public void testNoWaitClose() throws IOException { Directory directory = newDirectory(); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(100))); for (int iter = 0; iter < 10; iter++) { for (int j = 0; j < 201; j++) { idField.setStringValue(Integer.toString(iter * 201 + j)); writer.addDocument(doc); } int delID = iter * 201; for (int j = 0; j < 20; j++) { writer.deleteDocuments(new Term("id", Integer.toString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); writer.addDocument(doc); writer.commit(); writer.close(false); IndexReader reader = DirectoryReader.open(directory); assertEquals((1 + iter) * 182, reader.numDocs()); reader.close(); // Reopen writer = new IndexWriter( directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy(100))); } writer.close(); directory.close(); }
// Verifies no *.prx exists when all fields omit term positions: public void testNoPrxFile() throws Throwable { Directory ram = newDirectory(); if (ram instanceof MockDirectoryWrapper) { // we verify some files get deleted ((MockDirectoryWrapper) ram).setEnableVirusScanner(false); } Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setNoCFSRatio(0.0); Document d = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field f1 = newField("f1", "This field has term freqs", ft); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); writer.commit(); assertNoPrx(ram); // now add some documents with positions, and check there is no prox after optimization d = new Document(); f1 = newTextField("f1", "This field has positions", Field.Store.NO); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); assertNoPrx(ram); ram.close(); }
// Verifies no *.prx exists when all fields omit term freq: public void testNoPrxFile() throws Throwable { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(3) .setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setUseCompoundFile(false); Document d = new Document(); Field f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); writer.commit(); assertNoPrx(ram); // now add some documents with positions, and check // there is no prox after full merge d = new Document(); f1 = newTextField("f1", "This field has positions", Field.Store.NO); d.add(f1); for (int i = 0; i < 30; i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); assertNoPrx(ram); ram.close(); }
private IndexWriter initIndex(Random random, MockDirectoryWrapper dir, boolean initialCommit) throws IOException { dir.setLockFactory(NoLockFactory.getNoLockFactory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) .setMaxBufferedDocs(10) .setMergeScheduler(new ConcurrentMergeScheduler())); ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); if (initialCommit) { writer.commit(); } Document doc = new Document(); doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("id", "0", Field.Store.YES, Field.Index.ANALYZED)); for (int i = 0; i < 157; i++) writer.addDocument(doc); return writer; }
public void test() throws Exception { IndexWriterConfig defaultConfig = new IndexWriterConfig(null); Codec defaultCodec = defaultConfig.getCodec(); if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) { Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)"); Matcher matcher = regex.matcher(defaultCodec.toString()); assertTrue( "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find()); int maxDocsPerChunk = Integer.parseInt(matcher.group(1)); int blockSize = Integer.parseInt(matcher.group(2)); int product = maxDocsPerChunk * blockSize; assumeTrue( defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16); } BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setCodec(TestUtil.getDefaultCodec())); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); DirectoryReader subReaders[] = new DirectoryReader[1000]; Arrays.fill(subReaders, oneThousand); BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w2, subReaders); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new DirectoryReader[2000]; Arrays.fill(subReaders, oneMillion); BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w3, subReaders); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
public void testRandom() throws IOException, InterruptedException { final int numThreads = 1 + random().nextInt(8); final int numDocumentsToIndex = 50 + atLeast(70); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.setFlushPolicy(flushPolicy); final int numDWPT = 1 + random().nextInt(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.setIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy) writer.getConfig().getFlushPolicy(); DocumentsWriter docsWriter = writer.getDocsWriter(); assertNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.length; x++) { threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, true); threads[x].start(); } for (int x = 0; x < threads.length; x++) { threads[x].join(); } assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.maxDoc()); if (flushPolicy.flushOnRAM() && !flushPolicy.flushOnDocCount() && !flushPolicy.flushOnDeleteTerms()) { final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); assertTrue( "peak bytes without flush exceeded watermark", flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); if (flushPolicy.hasMarkedPending) { assertTrue( "max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, maxRAMBytes <= flushControl.peakActiveBytes); } } assertActiveBytesAfter(flushControl); writer.commit(); assertEquals(0, flushControl.activeBytes()); IndexReader r = DirectoryReader.open(dir); assertEquals(numDocumentsToIndex, r.numDocs()); assertEquals(numDocumentsToIndex, r.maxDoc()); if (!flushPolicy.flushOnRAM()) { assertFalse( "never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled()); assertFalse( "never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked()); } r.close(); writer.close(); dir.close(); }
/* * Test a deletion policy that keeps last N commits * around, through creates. */ public void testKeepLastNDeletionPolicyWithCreates() throws IOException { final int N = 10; for (int pass = 0; pass < 2; pass++) { boolean useCompoundFile = (pass % 2) != 0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(new KeepLastNDeletionPolicy(N)) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(dir, conf); KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int j = 0; j < 17; j++) { addDocWithID(writer, i * (N + 1) + j); } // this is a commit writer.close(); conf = new IndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setMergePolicy(NoMergePolicy.INSTANCE); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.deleteDocuments(new Term("id", "" + (i * (N + 1) + 3))); // this is a commit writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(16, hits.length); reader.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE) .setIndexDeletionPolicy(policy)); policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); // This will not commit: there are no changes // pending because we opened for "create": writer.close(); } assertEquals(3 * (N + 1) + 1, policy.numOnInit); assertEquals(3 * (N + 1) + 1, policy.numOnCommit); IndexReader rwReader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(rwReader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(0, hits.length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); int expectedCount = 0; rwReader.close(); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.open(dir); // Work backwards in commits on what the expected // count should be. searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals(expectedCount, hits.length); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } reader.close(); if (i == N) { fail("should have failed on commits before last " + N); } } catch (IOException e) { if (i != N) { throw e; } } if (i < N) { dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.close(); } }
public void testLiveMaxMergeCount() throws Exception { Directory d = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); TieredMergePolicy tmp = new TieredMergePolicy(); tmp.setSegmentsPerTier(1000); tmp.setMaxMergeAtOnce(1000); tmp.setMaxMergeAtOnceExplicit(10); iwc.setMergePolicy(tmp); iwc.setMaxBufferedDocs(2); iwc.setRAMBufferSizeMB(-1); final AtomicInteger maxRunningMergeCount = new AtomicInteger(); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() { final AtomicInteger runningMergeCount = new AtomicInteger(); @Override public void doMerge(MergePolicy.OneMerge merge) throws IOException { int count = runningMergeCount.incrementAndGet(); // evil? synchronized (this) { if (count > maxRunningMergeCount.get()) { maxRunningMergeCount.set(count); } } try { super.doMerge(merge); } finally { runningMergeCount.decrementAndGet(); } } }; cms.setMaxMergesAndThreads(5, 3); iwc.setMergeScheduler(cms); IndexWriter w = new IndexWriter(d, iwc); // Makes 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } // No merges should have run so far, because TMP has high segmentsPerTier: assertEquals(0, maxRunningMergeCount.get()); w.forceMerge(1); // At most 5 merge threads should have launched at once: assertTrue("maxRunningMergeCount=" + maxRunningMergeCount, maxRunningMergeCount.get() <= 5); maxRunningMergeCount.set(0); // Makes another 100 segments for (int i = 0; i < 200; i++) { w.addDocument(new Document()); } ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).setMaxMergesAndThreads(1, 1); w.forceMerge(1); // At most 1 merge thread should have launched at once: assertEquals(1, maxRunningMergeCount.get()); w.close(); d.close(); }
public void testDeletes1() throws Exception { // IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setMaxBufferedDocs(5000); iwc.setRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(false); iwc.setMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.addDocument(DocHelper.createDocument(x, "1", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit1"); writer.commit(); assertEquals(1, writer.segmentInfos.size()); for (int x = 5; x < 10; x++) { writer.addDocument(DocHelper.createDocument(x, "2", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } // System.out.println("commit2"); writer.commit(); assertEquals(2, writer.segmentInfos.size()); for (int x = 10; x < 15; x++) { writer.addDocument(DocHelper.createDocument(x, "3", 2)); // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.deleteDocuments(new Term("id", "1")); writer.deleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); assertTrue(writer.bufferedUpdatesStream.any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); assertFalse(writer.bufferedUpdatesStream.any()); r1.close(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.deleteDocuments(new Term("id", "2")); writer.flush(false, false); fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy(); fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.maybeMerge(); assertEquals(2, writer.segmentInfos.size()); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.getReader(); int[] id2docs = toDocsArray(new Term("id", "2"), null, r2); assertTrue(id2docs == null); r2.close(); /** * // added docs are in the ram buffer for (int x = 15; x < 20; x++) { * writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2)); * System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } * assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new * Term("id", Integer.toString(13))); * * <p>Term id3 = new Term("id", Integer.toString(3)); * * <p>// delete from the 1st segment writer.deleteDocuments(id3); * * <p>assertTrue(writer.numRamDocs() > 0); * * <p>//System.out // .println("segdels1:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>// we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; * System.out.println("maybeMerge "+writer.segmentInfos); * * <p>SegmentInfo info0 = writer.segmentInfos.info(0); SegmentInfo info1 = * writer.segmentInfos.info(1); * * <p>writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there * should be docs in RAM assertTrue(writer.numRamDocs() > 0); * * <p>// assert we've merged the 1 and 2 segments // and still have a segment leftover == 2 * assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0, * writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos)); * * <p>//System.out.println("segdels2:" + writer.docWriter.deletesToString()); * * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0); * * <p>IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0]; * printDelDocs(r1.getLiveDocs()); int[] docs = toDocsArray(id3, null, r); * System.out.println("id3 docs:"+Arrays.toString(docs)); // there shouldn't be any docs for * id:3 assertTrue(docs == null); r.close(); * * <p>part2(writer, fsmp); */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); // System.out.println("close"); writer.close(); dir.close(); }
/* Uses KeepAllDeletionPolicy to keep all commits around, * then, opens a new IndexWriter on a previous commit * point. */ public void testOpenPriorSnapshot() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(new KeepAllDeletionPolicy(dir)) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int i = 0; i < 10; i++) { addDoc(writer); if ((1 + i) % 2 == 0) writer.commit(); } writer.close(); Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); assertEquals(5, commits.size()); IndexCommit lastCommit = null; for (final IndexCommit commit : commits) { if (lastCommit == null || commit.getGeneration() > lastCommit.getGeneration()) lastCommit = commit; } assertTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); addDoc(writer); assertEquals(11, writer.numDocs()); writer.forceMerge(1); writer.close(); assertEquals(6, DirectoryReader.listCommits(dir).size()); // Now open writer on the commit just before merge: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Should undo our rollback: writer.rollback(); DirectoryReader r = DirectoryReader.open(dir); // Still merged, still 11 docs assertEquals(1, r.leaves().size()); assertEquals(11, r.numDocs()); r.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(policy) .setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Commits the rollback: writer.close(); // Now 8 because we made another commit assertEquals(7, DirectoryReader.listCommits(dir).size()); r = DirectoryReader.open(dir); // Not fully merged because we rolled it back, and now only // 10 docs assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); // Re-merge writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(policy)); writer.forceMerge(1); writer.close(); r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.open(dir); assertEquals(1, r.leaves().size()); assertEquals(10, r.numDocs()); r.close(); writer.close(); // Now reader sees not-fully-merged index: r = DirectoryReader.open(dir); assertTrue(r.leaves().size() > 1); assertEquals(10, r.numDocs()); r.close(); dir.close(); }
/* * Test a silly deletion policy that keeps all commits around. */ public void testKeepAllDeletionPolicy() throws IOException { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); writer.forceMerge(1); writer.close(); } assertEquals(needsMerging ? 2 : 1, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): assertEquals(1 + (needsMerging ? 1 : 0), policy.numOnCommit); // Test listCommits Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); // 2 from closing writer assertEquals(1 + (needsMerging ? 1 : 0), commits.size()); // Make sure we can open a reader on each commit: for (final IndexCommit commit : commits) { IndexReader r = DirectoryReader.open(commit); r.close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.open(dir); reader.close(); dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.listAll().length; writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy)); writer.close(); int postCount = dir.listAll().length; assertTrue(postCount < preCount); } } dir.close(); } }
/* * Test "by time expiration" deletion policy: */ public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException { final double SECONDS = 2.0; Directory dir = newDirectory(); if (dir instanceof MockDirectoryWrapper) { // test manually deletes files ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(dir, SECONDS)); MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); IndexWriter writer = new IndexWriter(dir, conf); ExpirationTimeDeletionPolicy policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); Map<String, String> commitData = new HashMap<>(); commitData.put("commitTime", String.valueOf(System.currentTimeMillis())); writer.setCommitData(commitData); writer.commit(); writer.close(); long lastDeleteTime = 0; final int targetNumDelete = TestUtil.nextInt(random(), 1, 5); while (policy.numDelete < targetNumDelete) { // Record last time when writer performed deletes of // past commits lastDeleteTime = System.currentTimeMillis(); conf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); mp.setNoCFSRatio(1.0); writer = new IndexWriter(dir, conf); policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); for (int j = 0; j < 17; j++) { addDoc(writer); } commitData = new HashMap<>(); commitData.put("commitTime", String.valueOf(System.currentTimeMillis())); writer.setCommitData(commitData); writer.commit(); writer.close(); Thread.sleep((int) (1000.0 * (SECONDS / 5.0))); } // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.getLastCommitGeneration(dir); String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); boolean oneSecondResolution = true; while (gen > 0) { try { IndexReader reader = DirectoryReader.open(dir); reader.close(); fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); // if we are on a filesystem that seems to have only // 1 second resolution, allow +1 second in commit // age tolerance: SegmentInfos sis = SegmentInfos.readCommit(dir, fileName); long modTime = Long.parseLong(sis.getUserData().get("commitTime")); oneSecondResolution &= (modTime % 1000) == 0; final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000); assertTrue( "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted ", lastDeleteTime - modTime <= leeway); } catch (IOException e) { // OK break; } dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.close(); }
/* * Make sure IndexWriter cleans up on hitting a disk * full exception in addDocument. * TODO: how to do this on windows with FSDirectory? */ public void testAddDocumentOnDiskFull() throws IOException { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: pass="******"TEST: cycle: diskFree=" + diskFree); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory()); dir.setMaxSizeInBytes(diskFree); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); MergeScheduler ms = writer.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); } boolean hitError = false; try { for (int i = 0; i < 200; i++) { addDoc(writer); } if (VERBOSE) { System.out.println("TEST: done adding docs; now commit"); } writer.commit(); } catch (IOException e) { if (VERBOSE) { System.out.println("TEST: exception on addDoc"); e.printStackTrace(System.out); } hitError = true; } if (hitError) { if (doAbort) { if (VERBOSE) { System.out.println("TEST: now rollback"); } writer.rollback(); } else { try { if (VERBOSE) { System.out.println("TEST: now close"); } writer.close(); } catch (IOException e) { if (VERBOSE) { System.out.println("TEST: exception on close; retry w/ no disk space limit"); e.printStackTrace(System.out); } dir.setMaxSizeInBytes(0); writer.close(); } } // _TestUtil.syncConcurrentMerges(ms); if (_TestUtil.anyFilesExceptWriteLock(dir)) { assertNoUnreferencedFiles(dir, "after disk full during addDocument"); // Make sure reader can open the index: IndexReader.open(dir, true).close(); } dir.close(); // Now try again w/ more space: diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 400, 600) : _TestUtil.nextInt(random, 3000, 5000); } else { // _TestUtil.syncConcurrentMerges(writer); dir.setMaxSizeInBytes(0); writer.close(); dir.close(); break; } } } }
/* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ public void testAddIndexOnDiskFull() throws IOException { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = newDirectory(); IndexWriter writer = new IndexWriter( dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < 25; j++) { addDocWithIndex(writer, 25 * i + j); } writer.close(); String[] files = dirs[i].listAll(); for (int j = 0; j < files.length; j++) { inputDiskUsage += dirs[i].fileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexesNoOptimize into a copy of this: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter( startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < START_COUNT; j++) { addDocWithIndex(writer, j); } writer.close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.open(startDir, true); assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 57, hits.length); searcher.close(); reader.close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.listAll(); long diskUsage = startDir.sizeInBytes(); long startDiskUsage = 0; String[] files = startDir.listAll(); for (int i = 0; i < files.length; i++) { startDiskUsage += startDir.fileLength(files[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) System.out.println("TEST: iter=" + iter); // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + _TestUtil.nextInt(random, 50, 200); int method = iter; boolean success = false; boolean done = false; String methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + optimize()"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { System.out.println("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir)); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy())); IOException err = null; writer.setInfoStream(VERBOSE ? System.out : null); MergeScheduler ms = writer.getConfig().getMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms instanceof ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; String testName = null; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space"; } if (VERBOSE) System.out.println("\ncycle: " + testName); dir.setTrackDiskUsage(true); dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); try { if (0 == method) { writer.addIndexes(dirs); writer.optimize(); } else if (1 == method) { IndexReader readers[] = new IndexReader[dirs.length]; for (int i = 0; i < dirs.length; i++) { readers[i] = IndexReader.open(dirs[i], true); } try { writer.addIndexes(readers); } finally { for (int i = 0; i < dirs.length; i++) { readers[i].close(); } } } else { writer.addIndexes(dirs); } success = true; if (VERBOSE) { System.out.println(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { System.out.println(" hit IOException: " + e); e.printStackTrace(System.out); } if (1 == x) { e.printStackTrace(System.out); fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.syncConcurrentMerges(writer); if (VERBOSE) { System.out.println(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): try { reader = IndexReader.open(dir, true); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (result != START_COUNT) { fail( testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = newSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs; } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length; if (success) { if (result2 != result) { fail( testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.close(); reader.close(); if (VERBOSE) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { System.out.println( " start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue( "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes() - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below: dir.setMaxSizeInBytes(0); dir.setRandomIOExceptionRate(0.0); writer.close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.syncConcurrentMerges(ms); dir.close(); // Try again with more free space: diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 4000, 8000) : _TestUtil.nextInt(random, 40000, 80000); } } startDir.close(); for (Directory dir : dirs) dir.close(); }
SingleIndex(String directoryName, String fileName, String indexPath) { try { Directory dir = FSDirectory.open(new File(indexPath)); // write to an directory for checking Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); IndexWriter writer = new IndexWriter(dir, iwc); String[] files = directoryName.list(); FileInputStream fis; try { fis = new FileInputStream(fileName); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" // message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add( new TextField( "contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } writer.close(); } catch (Exception e) { e.printStackTrace(); } }
@Slow public void testNoWaitClose() throws Throwable { Directory directory = newDirectory(); if (directory instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) directory).setPreventDoubleWrite(false); } final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(false); Field idField = newField("id", "", customType); doc.add(idField); for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: pass="******"TEST: iter=" + iter); } for (int j = 0; j < 199; j++) { idField.setStringValue(Integer.toString(iter * 201 + j)); writer.addDocument(doc); } int delID = iter * 199; for (int j = 0; j < 20; j++) { writer.deleteDocuments(new Term("id", Integer.toString(delID))); delID += 5; } writer.commit(); // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); final IndexWriter finalWriter = writer; final AtomicReference<Throwable> failure = new AtomicReference<>(); Thread t1 = new Thread() { @Override public void run() { boolean done = false; while (!done) { for (int i = 0; i < 100; i++) { try { finalWriter.addDocument(doc); } catch (AlreadyClosedException e) { done = true; break; } catch (NullPointerException e) { done = true; break; } catch (Throwable e) { e.printStackTrace(System.out); failure.set(e); done = true; break; } } Thread.yield(); } } }; t1.start(); writer.close(); t1.join(); if (failure.get() != null) { throw failure.get(); } // Make sure reader can read IndexReader reader = DirectoryReader.open(directory); reader.close(); // Reopen writer = new IndexWriter( directory, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy()) .setCommitOnClose(false)); } writer.close(); } directory.close(); }
@Nightly public void test() throws Exception { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexed(false); ft.setStored(true); ft.freeze(); final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20); final byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte) random().nextInt(256); } final Field f = new Field("fld", value, ft); doc.add(f); final int numDocs = (int) ((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.addDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { System.out.println(i + " of " + numDocs + "..."); } } w.forceMerge(1); w.close(); if (VERBOSE) { boolean found = false; for (String file : dir.listAll()) { if (file.endsWith(".fdt")) { final long fileLength = dir.fileLength(file); if (fileLength >= 1L << 32) { found = true; } System.out.println("File length of " + file + " : " + fileLength); } } if (!found) { System.out.println("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.open(dir); Document sd = rd.document(numDocs - 1); assertNotNull(sd); assertEquals(1, sd.getFields().size()); BytesRef valueRef = sd.getBinaryValue("fld"); assertNotNull(valueRef); assertEquals(new BytesRef(value), valueRef); rd.close(); dir.close(); }