/** * Test that if separate reader and writer objects are opened, new categories written into the * writer are available to a reader only after a commit(). Note that this test obviously doesn't * cover all the different concurrency scenarios, all different methods, and so on. We may want to * write more tests of this sort. * * <p>This test simulates what would happen when there are two separate processes, one doing * indexing, and the other searching, and each opens its own object (with obviously no connection * between the objects) using the same disk files. Note, though, that this test does not test what * happens when the two processes do their actual work at exactly the same time. It also doesn't * test multi-threading. */ @Test public void testSeparateReaderAndWriter() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) tw.addCategory(new FacetLabel("Author")); assertEquals(1, tr.getSize()); // still root only... assertNull( TaxonomyReader.openIfChanged( tr)); // this is not enough, because tw.commit() hasn't been done yet assertEquals(1, tr.getSize()); // still root only... tw.commit(); assertEquals(1, tr.getSize()); // still root only... TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; int author = 1; try { assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]); // ok } catch (ArrayIndexOutOfBoundsException e) { fail( "After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception"); } assertEquals(2, tr.getSize()); // finally, see there are two categories // now, add another category, and verify that after commit and refresh // the parent of this category is correct (this requires the reader // to correctly update its prefetched parent vector), and that the // old information also wasn't ruined: tw.addCategory(new FacetLabel("Author", "Richard Dawkins")); int dawkins = 2; tw.commit(); newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; int[] parents = tr.getParallelTaxonomyArrays().parents(); assertEquals(author, parents[dawkins]); assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]); assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); assertEquals(3, tr.getSize()); tw.close(); tr.close(); indexDir.close(); }
@Test public void testSeparateReaderAndWriter2() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); // Test getOrdinal(): FacetLabel author = new FacetLabel("Author"); assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); tw.addCategory(author); // before commit and refresh, no change: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... assertNull( TaxonomyReader.openIfChanged( tr)); // this is not enough, because tw.commit() hasn't been done yet assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... tw.commit(); // still not enough before refresh: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; assertEquals(1, tr.getOrdinal(author)); assertEquals(2, tr.getSize()); tw.close(); tr.close(); indexDir.close(); }
/** * testWriterTwice3 is yet another test which tests creating a taxonomy in two separate writing * sessions. This test used to fail because of a bug involving commit(), explained below, and now * should succeed. */ @Test public void testWriterTwice3() throws Exception { Directory indexDir = newDirectory(); // First, create and fill the taxonomy TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.close(); // Now, open the same taxonomy and add the same categories again. // After a few categories, the LuceneTaxonomyWriter implementation // will stop looking for each category on disk, and rather read them // all into memory and close it's reader. The bug was that it closed // the reader, but forgot that it did (because it didn't set the reader // reference to null). tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); // Add one new category, just to make commit() do something: tw.addCategory(new FacetLabel("hi")); // Do a commit(). Here was a bug - if tw had a reader open, it should // be reopened after the commit. However, in our case the reader should // not be open (as explained above) but because it was not set to null, // we forgot that, tried to reopen it, and got an AlreadyClosedException. tw.commit(); assertEquals(expectedCategories.length + 1, tw.getSize()); tw.close(); indexDir.close(); }
/** * fillTaxonomyCheckPaths adds the categories in the categories[] array, and asserts that the * additions return exactly paths specified in expectedPaths[]. This is the same add * fillTaxonomy() but also checks the correctness of getParent(), not just addCategory(). Note * that this assumes that fillTaxonomyCheckPaths() is called on an empty taxonomy index. Calling * it after something else was already added to the taxonomy index will surely have this method * fail. */ public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException { for (int i = 0; i < categories.length; i++) { int ordinal = tw.addCategory(new FacetLabel(categories[i])); int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1]; if (ordinal != expectedOrdinal) { fail( "For category " + showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } for (int j = expectedPaths[i].length - 2; j >= 0; j--) { ordinal = tw.getParent(ordinal); expectedOrdinal = expectedPaths[i][j]; if (ordinal != expectedOrdinal) { fail( "For category " + showcat(categories[i]) + " expected ancestor level " + (expectedPaths[i].length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal); } } } }
// Test that getParentArrays is valid when retrieved during refresh @Test public void testTaxonomyReaderRefreshRaces() throws Exception { // compute base child arrays - after first chunk, and after the other Directory indexDirBase = newDirectory(); TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.addCategory(new FacetLabel("a", "0")); final FacetLabel abPath = new FacetLabel("a", "b"); twBase.addCategory(abPath); twBase.commit(); TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase); final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays(); final int abOrd = trBase.getOrdinal(abPath); final int abYoungChildBase1 = ca1.children()[abOrd]; final int numCategories = atLeast(800); for (int i = 0; i < numCategories; i++) { twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i))); } twBase.close(); TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase); assertNotNull(newTaxoReader); trBase.close(); trBase = newTaxoReader; final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays(); final int abYoungChildBase2 = ca2.children()[abOrd]; int numRetries = atLeast(50); for (int retry = 0; retry < numRetries; retry++) { assertConsistentYoungestChild( abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } trBase.close(); indexDirBase.close(); }
/** Test how getChildrenArrays() deals with the taxonomy's growth: */ @Test public void testChildrenArraysGrowth() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("hi", "there")); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); assertEquals(3, tr.getSize()); assertEquals(3, ca.siblings().length); assertEquals(3, ca.children().length); assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children())); assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings())); tw.addCategory(new FacetLabel("hi", "ho")); tw.addCategory(new FacetLabel("hello")); tw.commit(); // Before refresh, nothing changed.. ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays(); assertSame(newca, ca); // we got exactly the same object assertEquals(3, tr.getSize()); assertEquals(3, ca.siblings().length); assertEquals(3, ca.children().length); // After the refresh, things change: TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); assertNotNull(newtr); tr.close(); tr = newtr; ca = tr.getParallelTaxonomyArrays(); assertEquals(5, tr.getSize()); assertEquals(5, ca.siblings().length); assertEquals(5, ca.children().length); assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children())); assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings())); tw.close(); tr.close(); indexDir.close(); }
/** * Test what happens if we try to write to a locked taxonomy writer, and see that we can unlock it * and continue. */ @Test public void testWriterLock() throws Exception { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. Directory indexDir = new RAMDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("hi", "there")); tw.commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(2, tr.getOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { assertNull(new DirectoryTaxonomyWriter(indexDir)); fail("should have failed to write in locked directory"); } catch (LockObtainFailedException e) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.unlock(indexDir); TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.addCategory(new FacetLabel("hey")); tw2.close(); // See that the writer indeed wrote: TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); assertNotNull(newtr); tr.close(); tr = newtr; assertEquals(3, tr.getOrdinal(new FacetLabel("hey"))); tr.close(); tw.close(); indexDir.close(); }
/** * fillTaxonomy adds the categories in the categories[] array, and asserts that the additions * return exactly the ordinals (in the past - paths) specified in expectedPaths[]. Note that this * assumes that fillTaxonomy() is called on an empty taxonomy index. Calling it after something * else was already added to the taxonomy index will surely have this method fail. */ public static void fillTaxonomy(TaxonomyWriter tw) throws IOException { for (int i = 0; i < categories.length; i++) { int ordinal = tw.addCategory(new FacetLabel(categories[i])); int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1]; if (ordinal != expectedOrdinal) { fail( "For category " + showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } }
/** * Another set of tests for the writer, which don't use an array and try to distill the different * cases, and therefore may be more helpful for debugging a problem than testWriter() which is * hard to know why or where it failed. */ @Test public void testWriterSimpler() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); assertEquals(1, tw.getSize()); // the root only // Test that adding a new top-level category works assertEquals(1, tw.addCategory(new FacetLabel("a"))); assertEquals(2, tw.getSize()); // Test that adding the same category again is noticed, and the // same ordinal (and not a new one) is returned. assertEquals(1, tw.addCategory(new FacetLabel("a"))); assertEquals(2, tw.getSize()); // Test that adding another top-level category returns a new ordinal, // not the same one assertEquals(2, tw.addCategory(new FacetLabel("b"))); assertEquals(3, tw.getSize()); // Test that adding a category inside one of the above adds just one // new ordinal: assertEquals(3, tw.addCategory(new FacetLabel("a", "c"))); assertEquals(4, tw.getSize()); // Test that adding the same second-level category doesn't do anything: assertEquals(3, tw.addCategory(new FacetLabel("a", "c"))); assertEquals(4, tw.getSize()); // Test that adding a second-level category with two new components // indeed adds two categories assertEquals(5, tw.addCategory(new FacetLabel("d", "e"))); assertEquals(6, tw.getSize()); // Verify that the parents were added above in the order we expected assertEquals(4, tw.addCategory(new FacetLabel("d"))); // Similar, but inside a category that already exists: assertEquals(7, tw.addCategory(new FacetLabel("b", "d", "e"))); assertEquals(8, tw.getSize()); // And now inside two levels of categories that already exist: assertEquals(8, tw.addCategory(new FacetLabel("b", "d", "f"))); assertEquals(9, tw.getSize()); tw.close(); indexDir.close(); }
private void assertConsistentYoungestChild( final FacetLabel abPath, final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories) throws Exception { SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("a", "0")); tw.addCategory(abPath); tw.commit(); final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); for (int i = 0; i < numCategories; i++) { final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i)); tw.addCategory(cp); assertEquals( "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp)); } tw.close(); final AtomicBoolean stop = new AtomicBoolean(false); final Throwable[] error = new Throwable[] {null}; final int retrieval[] = {0}; Thread thread = new Thread("Child Arrays Verifier") { @Override public void run() { setPriority(1 + getPriority()); try { while (!stop.get()) { int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1; assertNotNull( "path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd)); assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++); sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms } } catch (Throwable e) { error[0] = e; stop.set(true); } } private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) { final int abYoungChild = ca.children()[abOrd]; assertTrue( "Retry " + retry + ": retrieval: " + retrieval + ": wrong youngest child for category " + abPath + " (ord=" + abOrd + ") - must be either " + abYoungChildBase1 + " or " + abYoungChildBase2 + " but was: " + abYoungChild, abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]); } }; thread.start(); indexDir.setSleepMillis(1); // some delay for refresh TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); if (newTaxoReader != null) { newTaxoReader.close(); } stop.set(true); thread.join(); assertNull( "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0]), error[0]); tr.close(); }