/* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ public void testAddIndexOnDiskFull() throws IOException { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = newDirectory(); IndexWriter writer = new IndexWriter( dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < 25; j++) { addDocWithIndex(writer, 25 * i + j); } writer.close(); String[] files = dirs[i].listAll(); for (int j = 0; j < files.length; j++) { inputDiskUsage += dirs[i].fileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexesNoOptimize into a copy of this: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter( startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < START_COUNT; j++) { addDocWithIndex(writer, j); } writer.close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.open(startDir, true); assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 57, hits.length); searcher.close(); reader.close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.listAll(); long diskUsage = startDir.sizeInBytes(); long startDiskUsage = 0; String[] files = startDir.listAll(); for (int i = 0; i < files.length; i++) { startDiskUsage += startDir.fileLength(files[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) System.out.println("TEST: iter=" + iter); // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + _TestUtil.nextInt(random, 50, 200); int method = iter; boolean success = false; boolean done = false; String methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + optimize()"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { System.out.println("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir)); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy())); IOException err = null; writer.setInfoStream(VERBOSE ? System.out : null); MergeScheduler ms = writer.getConfig().getMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms instanceof ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; String testName = null; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space"; } if (VERBOSE) System.out.println("\ncycle: " + testName); dir.setTrackDiskUsage(true); dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); try { if (0 == method) { writer.addIndexes(dirs); writer.optimize(); } else if (1 == method) { IndexReader readers[] = new IndexReader[dirs.length]; for (int i = 0; i < dirs.length; i++) { readers[i] = IndexReader.open(dirs[i], true); } try { writer.addIndexes(readers); } finally { for (int i = 0; i < dirs.length; i++) { readers[i].close(); } } } else { writer.addIndexes(dirs); } success = true; if (VERBOSE) { System.out.println(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { System.out.println(" hit IOException: " + e); e.printStackTrace(System.out); } if (1 == x) { e.printStackTrace(System.out); fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.syncConcurrentMerges(writer); if (VERBOSE) { System.out.println(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): try { reader = IndexReader.open(dir, true); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (result != START_COUNT) { fail( testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = newSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs; } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length; if (success) { if (result2 != result) { fail( testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.close(); reader.close(); if (VERBOSE) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { System.out.println( " start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue( "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes() - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below: dir.setMaxSizeInBytes(0); dir.setRandomIOExceptionRate(0.0); writer.close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.syncConcurrentMerges(ms); dir.close(); // Try again with more free space: diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 4000, 8000) : _TestUtil.nextInt(random, 40000, 80000); } } startDir.close(); for (Directory dir : dirs) dir.close(); }
/* * Verify that a writer with "commit on close" indeed * cleans up the temp segments created after opening * that are not referenced by the starting segments * file. We check this by using MockDirectoryWrapper to * measure max temp disk space used. */ public void testCommitOnCloseDiskUsage() throws IOException { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): final String idFormat = _TestUtil.getPostingsFormat("id"); final String contentFormat = _TestUtil.getPostingsFormat("content"); assumeFalse( "This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory")); MockDirectoryWrapper dir = newDirectory(); Analyzer analyzer; if (random().nextBoolean()) { // no payloads analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName, Reader reader) { return new TokenStreamComponents( new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } }; } else { // fixed length payloads final int length = random().nextInt(200); analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents( tokenizer, new MockFixedLengthPayloadFilter(random(), tokenizer, length)); } }; } IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(10) .setReaderPooling(false) .setMergePolicy(newLogMergePolicy(10))); for (int j = 0; j < 30; j++) { TestIndexWriter.addDocWithIndex(writer, j); } writer.close(); dir.resetMaxUsedSizeInBytes(); dir.setTrackDiskUsage(true); long startDiskUsage = dir.getMaxUsedSizeInBytes(); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setOpenMode(OpenMode.APPEND) .setMaxBufferedDocs(10) .setMergeScheduler(new SerialMergeScheduler()) .setReaderPooling(false) .setMergePolicy(newLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { TestIndexWriter.addDocWithIndex(writer, j); } long midDiskUsage = dir.getMaxUsedSizeInBytes(); dir.resetMaxUsedSizeInBytes(); writer.forceMerge(1); writer.close(); DirectoryReader.open(dir).close(); long endDiskUsage = dir.getMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + // endDiskUsage); assertTrue( "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150), midDiskUsage < 150 * startDiskUsage); assertTrue( "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150), endDiskUsage < 150 * startDiskUsage); dir.close(); }