// test the simple case public void testSimpleCase() throws IOException { String[] keywords = {"1", "2"}; String[] unindexed = {"Netherlands", "Italy"}; String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; String[] text = {"Amsterdam", "Venice"}; Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.addDocument(doc); } modifier.optimize(); modifier.commit(); Term term = new Term("city", "Amsterdam"); int hitCount = getHitCount(dir, term); assertEquals(1, hitCount); modifier.deleteDocuments(term); modifier.commit(); hitCount = getHitCount(dir, term); assertEquals(0, hitCount); modifier.close(); dir.close(); }
/* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ public void testAddIndexOnDiskFull() throws IOException { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = newDirectory(); IndexWriter writer = new IndexWriter( dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < 25; j++) { addDocWithIndex(writer, 25 * i + j); } writer.close(); String[] files = dirs[i].listAll(); for (int j = 0; j < files.length; j++) { inputDiskUsage += dirs[i].fileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexesNoOptimize into a copy of this: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter( startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < START_COUNT; j++) { addDocWithIndex(writer, j); } writer.close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.open(startDir, true); assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 57, hits.length); searcher.close(); reader.close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.listAll(); long diskUsage = startDir.sizeInBytes(); long startDiskUsage = 0; String[] files = startDir.listAll(); for (int i = 0; i < files.length; i++) { startDiskUsage += startDir.fileLength(files[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) System.out.println("TEST: iter=" + iter); // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + _TestUtil.nextInt(random, 50, 200); int method = iter; boolean success = false; boolean done = false; String methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + optimize()"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { System.out.println("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir)); writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy())); IOException err = null; writer.setInfoStream(VERBOSE ? System.out : null); MergeScheduler ms = writer.getConfig().getMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms instanceof ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; String testName = null; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space"; } if (VERBOSE) System.out.println("\ncycle: " + testName); dir.setTrackDiskUsage(true); dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); try { if (0 == method) { writer.addIndexes(dirs); writer.optimize(); } else if (1 == method) { IndexReader readers[] = new IndexReader[dirs.length]; for (int i = 0; i < dirs.length; i++) { readers[i] = IndexReader.open(dirs[i], true); } try { writer.addIndexes(readers); } finally { for (int i = 0; i < dirs.length; i++) { readers[i].close(); } } } else { writer.addIndexes(dirs); } success = true; if (VERBOSE) { System.out.println(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { System.out.println(" hit IOException: " + e); e.printStackTrace(System.out); } if (1 == x) { e.printStackTrace(System.out); fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.syncConcurrentMerges(writer); if (VERBOSE) { System.out.println(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): try { reader = IndexReader.open(dir, true); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (result != START_COUNT) { fail( testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = newSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs; } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length; if (success) { if (result2 != result) { fail( testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail( testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.close(); reader.close(); if (VERBOSE) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { System.out.println( " start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue( "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes() - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below: dir.setMaxSizeInBytes(0); dir.setRandomIOExceptionRate(0.0); writer.close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.syncConcurrentMerges(ms); dir.close(); // Try again with more free space: diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 4000, 8000) : _TestUtil.nextInt(random, 40000, 80000); } } startDir.close(); for (Directory dir : dirs) dir.close(); }
public void testDeletedDocs() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); Document doc = new Document(); doc.add( newField( "field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 19; i++) { writer.addDocument(doc); } writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(5); reader.close(); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(new PrintStream(bos)); if (VERBOSE) checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.checkIndex(); if (indexStatus.clean == false) { System.out.println("CheckIndex failed"); System.out.println(bos.toString()); fail(); } final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0); assertTrue(seg.openReaderPassed); assertNotNull(seg.diagnostics); assertNotNull(seg.fieldNormStatus); assertNull(seg.fieldNormStatus.error); assertEquals(1, seg.fieldNormStatus.totFields); assertNotNull(seg.termIndexStatus); assertNull(seg.termIndexStatus.error); assertEquals(1, seg.termIndexStatus.termCount); assertEquals(19, seg.termIndexStatus.totFreq); assertEquals(18, seg.termIndexStatus.totPos); assertNotNull(seg.storedFieldStatus); assertNull(seg.storedFieldStatus.error); assertEquals(18, seg.storedFieldStatus.docCount); assertEquals(18, seg.storedFieldStatus.totFields); assertNotNull(seg.termVectorStatus); assertNull(seg.termVectorStatus.error); assertEquals(18, seg.termVectorStatus.docCount); assertEquals(18, seg.termVectorStatus.totVectors); assertTrue(seg.diagnostics.size() > 0); final List<String> onlySegments = new ArrayList<String>(); onlySegments.add("_0"); assertTrue(checker.checkIndex(onlySegments).clean == true); dir.close(); }
public void testSkipTo(int indexDivisor) throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) addDoc(writer, "aaa aaa aaa aaa"); Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) addDoc(writer, "bbb bbb bbb bbb"); Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) addDoc(writer, "ccc ccc ccc ccc"); // assure that we deal with a single segment writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir); reader.setTermInfosIndexDivisor(indexDivisor); assertEquals(indexDivisor, reader.getTermInfosIndexDivisor()); TermDocs tdocs = reader.termDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs.seek(ta); assertTrue(tdocs.next()); assertEquals(0, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.next()); assertEquals(1, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.skipTo(0)); assertEquals(2, tdocs.doc()); assertTrue(tdocs.skipTo(4)); assertEquals(4, tdocs.doc()); assertTrue(tdocs.skipTo(9)); assertEquals(9, tdocs.doc()); assertFalse(tdocs.skipTo(10)); // without next tdocs.seek(ta); assertTrue(tdocs.skipTo(0)); assertEquals(0, tdocs.doc()); assertTrue(tdocs.skipTo(4)); assertEquals(4, tdocs.doc()); assertTrue(tdocs.skipTo(9)); assertEquals(9, tdocs.doc()); assertFalse(tdocs.skipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs.seek(tb); assertTrue(tdocs.next()); assertEquals(10, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.next()); assertEquals(11, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.skipTo(5)); assertEquals(12, tdocs.doc()); assertTrue(tdocs.skipTo(15)); assertEquals(15, tdocs.doc()); assertTrue(tdocs.skipTo(24)); assertEquals(24, tdocs.doc()); assertTrue(tdocs.skipTo(25)); assertEquals(25, tdocs.doc()); assertFalse(tdocs.skipTo(26)); // without next tdocs.seek(tb); assertTrue(tdocs.skipTo(5)); assertEquals(10, tdocs.doc()); assertTrue(tdocs.skipTo(15)); assertEquals(15, tdocs.doc()); assertTrue(tdocs.skipTo(24)); assertEquals(24, tdocs.doc()); assertTrue(tdocs.skipTo(25)); assertEquals(25, tdocs.doc()); assertFalse(tdocs.skipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs.seek(tc); assertTrue(tdocs.next()); assertEquals(26, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.next()); assertEquals(27, tdocs.doc()); assertEquals(4, tdocs.freq()); assertTrue(tdocs.skipTo(5)); assertEquals(28, tdocs.doc()); assertTrue(tdocs.skipTo(40)); assertEquals(40, tdocs.doc()); assertTrue(tdocs.skipTo(57)); assertEquals(57, tdocs.doc()); assertTrue(tdocs.skipTo(74)); assertEquals(74, tdocs.doc()); assertTrue(tdocs.skipTo(75)); assertEquals(75, tdocs.doc()); assertFalse(tdocs.skipTo(76)); // without next tdocs.seek(tc); assertTrue(tdocs.skipTo(5)); assertEquals(26, tdocs.doc()); assertTrue(tdocs.skipTo(40)); assertEquals(40, tdocs.doc()); assertTrue(tdocs.skipTo(57)); assertEquals(57, tdocs.doc()); assertTrue(tdocs.skipTo(74)); assertEquals(74, tdocs.doc()); assertTrue(tdocs.skipTo(75)); assertEquals(75, tdocs.doc()); assertFalse(tdocs.skipTo(76)); tdocs.close(); reader.close(); dir.close(); }
// This test tests that buffered deletes are cleared when // an Exception is hit during flush. public void testErrorAfterApplyDeletes() throws IOException { MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() { boolean sawMaybe = false; boolean failed = false; @Override public MockRAMDirectory.Failure reset() { sawMaybe = false; failed = false; return this; } @Override public void eval(MockRAMDirectory dir) throws IOException { if (sawMaybe && !failed) { boolean seen = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if ("applyDeletes".equals(trace[i].getMethodName())) { seen = true; break; } } if (!seen) { // Only fail once we are no longer in applyDeletes failed = true; throw new IOException("fail after applyDeletes"); } } if (!failed) { StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if ("applyDeletes".equals(trace[i].getMethodName())) { sawMaybe = true; break; } } } } }; // create a couple of files String[] keywords = {"1", "2"}; String[] unindexed = {"Netherlands", "Italy"}; String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; String[] text = {"Amsterdam", "Venice"}; MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(2); dir.failOn(failure.reset()); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.addDocument(doc); } // flush (and commit if ac) modifier.optimize(); modifier.commit(); // one of the two files hits Term term = new Term("city", "Amsterdam"); int hitCount = getHitCount(dir, term); assertEquals(1, hitCount); // open the writer again (closed above) // delete the doc // max buf del terms is two, so this is buffered modifier.deleteDocuments(term); // add a doc (needed for the !ac case; see below) // doc remains buffered Document doc = new Document(); modifier.addDocument(doc); // commit the changes, the buffered deletes, and the new doc // The failure object will fail on the first write after the del // file gets created when processing the buffered delete // in the ac case, this will be when writing the new segments // files so we really don't need the new doc, but it's harmless // in the !ac case, a new segments file won't be created but in // this case, creation of the cfs file happens next so we need // the doc (to test that it's okay that we don't lose deletes if // failing while creating the cfs file) boolean failed = false; try { modifier.commit(); } catch (IOException ioe) { failed = true; } assertTrue(failed); // The commit above failed, so we need to retry it (which will // succeed, because the failure is a one-shot) modifier.commit(); hitCount = getHitCount(dir, term); // Make sure the delete was successfully flushed: assertEquals(0, hitCount); modifier.close(); dir.close(); }