// test the simple case public void testSimpleCase() throws IOException { String[] keywords = {"1", "2"}; String[] unindexed = {"Netherlands", "Italy"}; String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; String[] text = {"Amsterdam", "Venice"}; Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.addDocument(doc); } modifier.optimize(); modifier.commit(); Term term = new Term("city", "Amsterdam"); int hitCount = getHitCount(dir, term); assertEquals(1, hitCount); modifier.deleteDocuments(term); modifier.commit(); hitCount = getHitCount(dir, term); assertEquals(0, hitCount); modifier.close(); dir.close(); }
public void sort(File directory) throws IOException { LOG.info("IndexSorter: starting."); Date start = new Date(); int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128); IndexReader reader = IndexReader.open(new File(directory, "index")); Searcher searcher = new IndexSearcher(new File(directory, "index").getAbsolutePath()); // TODO MC SortingReader sorter = new SortingReader(reader, newToOld(reader, searcher)); // TODO MC IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"), null, true); writer.setTermIndexInterval(termIndexInterval); writer.setUseCompoundFile(false); writer.addIndexes(new IndexReader[] {sorter}); writer.close(); Date end = new Date(); LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime()) + " total milliseconds"); }
// This test tests that buffered deletes are cleared when // an Exception is hit during flush. public void testErrorAfterApplyDeletes() throws IOException { MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() { boolean sawMaybe = false; boolean failed = false; @Override public MockRAMDirectory.Failure reset() { sawMaybe = false; failed = false; return this; } @Override public void eval(MockRAMDirectory dir) throws IOException { if (sawMaybe && !failed) { boolean seen = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if ("applyDeletes".equals(trace[i].getMethodName())) { seen = true; break; } } if (!seen) { // Only fail once we are no longer in applyDeletes failed = true; throw new IOException("fail after applyDeletes"); } } if (!failed) { StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if ("applyDeletes".equals(trace[i].getMethodName())) { sawMaybe = true; break; } } } } }; // create a couple of files String[] keywords = {"1", "2"}; String[] unindexed = {"Netherlands", "Italy"}; String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; String[] text = {"Amsterdam", "Venice"}; MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); modifier.setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(2); dir.failOn(failure.reset()); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.addDocument(doc); } // flush (and commit if ac) modifier.optimize(); modifier.commit(); // one of the two files hits Term term = new Term("city", "Amsterdam"); int hitCount = getHitCount(dir, term); assertEquals(1, hitCount); // open the writer again (closed above) // delete the doc // max buf del terms is two, so this is buffered modifier.deleteDocuments(term); // add a doc (needed for the !ac case; see below) // doc remains buffered Document doc = new Document(); modifier.addDocument(doc); // commit the changes, the buffered deletes, and the new doc // The failure object will fail on the first write after the del // file gets created when processing the buffered delete // in the ac case, this will be when writing the new segments // files so we really don't need the new doc, but it's harmless // in the !ac case, a new segments file won't be created but in // this case, creation of the cfs file happens next so we need // the doc (to test that it's okay that we don't lose deletes if // failing while creating the cfs file) boolean failed = false; try { modifier.commit(); } catch (IOException ioe) { failed = true; } assertTrue(failed); // The commit above failed, so we need to retry it (which will // succeed, because the failure is a one-shot) modifier.commit(); hitCount = getHitCount(dir, term); // Make sure the delete was successfully flushed: assertEquals(0, hitCount); modifier.close(); dir.close(); }