/** * Test TaxonomyReader's child browsing method, getChildrenArrays() This only tests for * correctness of the data on one example - we have below further tests on data refresh etc. */ @Test public void testChildrenArrays() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); int[] youngestChildArray = ca.children(); assertEquals(tr.getSize(), youngestChildArray.length); int[] olderSiblingArray = ca.siblings(); assertEquals(tr.getSize(), olderSiblingArray.length); for (int i = 0; i < expectedCategories.length; i++) { // find expected children by looking at all expectedCategories // for children ArrayList<Integer> expectedChildren = new ArrayList<>(); for (int j = expectedCategories.length - 1; j >= 0; j--) { if (expectedCategories[j].length != expectedCategories[i].length + 1) { continue; // not longer by 1, so can't be a child } boolean ischild = true; for (int k = 0; k < expectedCategories[i].length; k++) { if (!expectedCategories[j][k].equals(expectedCategories[i][k])) { ischild = false; break; } } if (ischild) { expectedChildren.add(j); } } // check that children and expectedChildren are the same, with the // correct reverse (youngest to oldest) order: if (expectedChildren.size() == 0) { assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]); } else { int child = youngestChildArray[i]; assertEquals(expectedChildren.get(0).intValue(), child); for (int j = 1; j < expectedChildren.size(); j++) { child = olderSiblingArray[child]; assertEquals(expectedChildren.get(j).intValue(), child); // if child is INVALID_ORDINAL we should stop, but // assertEquals would fail in this case anyway. } // When we're done comparing, olderSiblingArray should now point // to INVALID_ORDINAL, saying there are no more children. If it // doesn't, we found too many children... assertEquals(-1, olderSiblingArray[child]); } } tr.close(); indexDir.close(); }
private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception { // check that the parent of the root ordinal is the invalid ordinal: assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0)); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) { FacetLabel me = tr.getPath(ordinal); int parentOrdinal = tw.getParent(ordinal); FacetLabel parent = tr.getPath(parentOrdinal); if (parent == null) { fail( "Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the // strings if (!me.subpath(me.length - 1).equals(parent)) { fail( "Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + showcat(parent) + " and " + showcat(me) + " respectively."); } } // check parent of of invalid ordinals: try { tw.getParent(-1); fail("getParent for -1 should throw exception"); } catch (ArrayIndexOutOfBoundsException e) { // ok } try { tw.getParent(TaxonomyReader.INVALID_ORDINAL); fail("getParent for INVALID_ORDINAL should throw exception"); } catch (ArrayIndexOutOfBoundsException e) { // ok } try { int parent = tw.getParent(tr.getSize()); fail("getParent for getSize() should throw exception, but returned " + parent); } catch (ArrayIndexOutOfBoundsException e) { // ok } }
@Test public void testWriterParent2() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); checkWriterParent(tr, tw); tw.close(); tr.close(); indexDir.close(); }
/** Test how getChildrenArrays() deals with the taxonomy's growth: */ @Test public void testChildrenArraysGrowth() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("hi", "there")); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); assertEquals(3, tr.getSize()); assertEquals(3, ca.siblings().length); assertEquals(3, ca.children().length); assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children())); assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings())); tw.addCategory(new FacetLabel("hi", "ho")); tw.addCategory(new FacetLabel("hello")); tw.commit(); // Before refresh, nothing changed.. ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays(); assertSame(newca, ca); // we got exactly the same object assertEquals(3, tr.getSize()); assertEquals(3, ca.siblings().length); assertEquals(3, ca.children().length); // After the refresh, things change: TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); assertNotNull(newtr); tr.close(); tr = newtr; ca = tr.getParallelTaxonomyArrays(); assertEquals(5, tr.getSize()); assertEquals(5, ca.siblings().length); assertEquals(5, ca.children().length); assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children())); assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings())); tw.close(); tr.close(); indexDir.close(); }
/** * The following test is exactly the same as testRootOnly, except we do not close the writer * before opening the reader. We want to see that the root is visible to the reader not only after * the writer is closed, but immediately after it is created. */ @Test public void testRootOnly2() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); assertEquals(0, tr.getPath(0).length); assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); assertEquals(0, tr.getOrdinal(new FacetLabel())); tw.close(); tr.close(); indexDir.close(); }
/** * Test writing an empty index, and seeing that a reader finds in it the root category, and only * it. We check all the methods on that root category return the expected results. */ @Test public void testRootOnly() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); // right after opening the index, it should already contain the // root, so have size 1: assertEquals(1, tw.getSize()); tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); assertEquals(0, tr.getPath(0).length); assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); assertEquals(0, tr.getOrdinal(new FacetLabel())); tr.close(); indexDir.close(); }
@Test public void testNRT() throws Exception { Directory dir = newDirectory(); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); TaxonomyReader reader = new DirectoryTaxonomyReader(writer); FacetLabel cp = new FacetLabel("a"); writer.addCategory(cp); TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader); assertNotNull("expected a new instance", newReader); assertEquals(2, newReader.getSize()); assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp)); reader.close(); reader = newReader; writer.close(); reader.close(); dir.close(); }
// Test that getParentArrays is valid when retrieved during refresh @Test public void testTaxonomyReaderRefreshRaces() throws Exception { // compute base child arrays - after first chunk, and after the other Directory indexDirBase = newDirectory(); TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.addCategory(new FacetLabel("a", "0")); final FacetLabel abPath = new FacetLabel("a", "b"); twBase.addCategory(abPath); twBase.commit(); TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase); final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays(); final int abOrd = trBase.getOrdinal(abPath); final int abYoungChildBase1 = ca1.children()[abOrd]; final int numCategories = atLeast(800); for (int i = 0; i < numCategories; i++) { twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i))); } twBase.close(); TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase); assertNotNull(newTaxoReader); trBase.close(); trBase = newTaxoReader; final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays(); final int abYoungChildBase2 = ca2.children()[abOrd]; int numRetries = atLeast(50); for (int retry = 0; retry < numRetries; retry++) { assertConsistentYoungestChild( abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } trBase.close(); indexDirBase.close(); }
/** * Tests for TaxonomyReader's getParent() method. We check it by comparing its results to those we * could have gotten by looking at the category string paths (where the parentage is obvious). * Note that after testReaderBasic(), we already know we can trust the ordinal <=> category * conversions. * * <p>Note: At the moment, the parent methods in the reader are deprecated, but this does not mean * they should not be tested! Until they are removed (*if* they are removed), these tests should * remain to see that they still work correctly. */ @Test public void testReaderParent() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); // check that the parent of the root ordinal is the invalid ordinal: int[] parents = tr.getParallelTaxonomyArrays().parents(); assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) { FacetLabel me = tr.getPath(ordinal); int parentOrdinal = parents[ordinal]; FacetLabel parent = tr.getPath(parentOrdinal); if (parent == null) { fail( "Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the strings if (!me.subpath(me.length - 1).equals(parent)) { fail( "Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + showcat(parent) + " and " + showcat(me) + " respectively."); } } tr.close(); indexDir.close(); }
/** * Test what happens if we try to write to a locked taxonomy writer, and see that we can unlock it * and continue. */ @Test public void testWriterLock() throws Exception { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. Directory indexDir = new RAMDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("hi", "there")); tw.commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(2, tr.getOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { assertNull(new DirectoryTaxonomyWriter(indexDir)); fail("should have failed to write in locked directory"); } catch (LockObtainFailedException e) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.unlock(indexDir); TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.addCategory(new FacetLabel("hey")); tw2.close(); // See that the writer indeed wrote: TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); assertNotNull(newtr); tr.close(); tr = newtr; assertEquals(3, tr.getOrdinal(new FacetLabel("hey"))); tr.close(); tw.close(); indexDir.close(); }
@Test public void testSeparateReaderAndWriter2() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); // Test getOrdinal(): FacetLabel author = new FacetLabel("Author"); assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); tw.addCategory(author); // before commit and refresh, no change: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... assertNull( TaxonomyReader.openIfChanged( tr)); // this is not enough, because tw.commit() hasn't been done yet assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... tw.commit(); // still not enough before refresh: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; assertEquals(1, tr.getOrdinal(author)); assertEquals(2, tr.getSize()); tw.close(); tr.close(); indexDir.close(); }
/** * Test that if separate reader and writer objects are opened, new categories written into the * writer are available to a reader only after a commit(). Note that this test obviously doesn't * cover all the different concurrency scenarios, all different methods, and so on. We may want to * write more tests of this sort. * * <p>This test simulates what would happen when there are two separate processes, one doing * indexing, and the other searching, and each opens its own object (with obviously no connection * between the objects) using the same disk files. Note, though, that this test does not test what * happens when the two processes do their actual work at exactly the same time. It also doesn't * test multi-threading. */ @Test public void testSeparateReaderAndWriter() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) tw.addCategory(new FacetLabel("Author")); assertEquals(1, tr.getSize()); // still root only... assertNull( TaxonomyReader.openIfChanged( tr)); // this is not enough, because tw.commit() hasn't been done yet assertEquals(1, tr.getSize()); // still root only... tw.commit(); assertEquals(1, tr.getSize()); // still root only... TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; int author = 1; try { assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]); // ok } catch (ArrayIndexOutOfBoundsException e) { fail( "After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception"); } assertEquals(2, tr.getSize()); // finally, see there are two categories // now, add another category, and verify that after commit and refresh // the parent of this category is correct (this requires the reader // to correctly update its prefetched parent vector), and that the // old information also wasn't ruined: tw.addCategory(new FacetLabel("Author", "Richard Dawkins")); int dawkins = 2; tw.commit(); newTaxoReader = TaxonomyReader.openIfChanged(tr); assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; int[] parents = tr.getParallelTaxonomyArrays().parents(); assertEquals(author, parents[dawkins]); assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]); assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); assertEquals(3, tr.getSize()); tw.close(); tr.close(); indexDir.close(); }
private void assertConsistentYoungestChild( final FacetLabel abPath, final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories) throws Exception { SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new FacetLabel("a", "0")); tw.addCategory(abPath); tw.commit(); final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); for (int i = 0; i < numCategories; i++) { final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i)); tw.addCategory(cp); assertEquals( "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp)); } tw.close(); final AtomicBoolean stop = new AtomicBoolean(false); final Throwable[] error = new Throwable[] {null}; final int retrieval[] = {0}; Thread thread = new Thread("Child Arrays Verifier") { @Override public void run() { setPriority(1 + getPriority()); try { while (!stop.get()) { int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1; assertNotNull( "path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd)); assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++); sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms } } catch (Throwable e) { error[0] = e; stop.set(true); } } private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) { final int abYoungChild = ca.children()[abOrd]; assertTrue( "Retry " + retry + ": retrieval: " + retrieval + ": wrong youngest child for category " + abPath + " (ord=" + abOrd + ") - must be either " + abYoungChildBase1 + " or " + abYoungChildBase2 + " but was: " + abYoungChild, abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]); } }; thread.start(); indexDir.setSleepMillis(1); // some delay for refresh TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); if (newTaxoReader != null) { newTaxoReader.close(); } stop.set(true); thread.join(); assertNull( "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0]), error[0]); tr.close(); }
/** * Similar to testChildrenArrays, except rather than look at expected results, we test for several * "invariants" that the results should uphold, e.g., that a child of a category indeed has this * category as its parent. This sort of test can more easily be extended to larger example * taxonomies, because we do not need to build the expected list of categories like we did in the * above test. */ @Test public void testChildrenArraysInvariants() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); int[] children = ca.children(); assertEquals(tr.getSize(), children.length); int[] olderSiblingArray = ca.siblings(); assertEquals(tr.getSize(), olderSiblingArray.length); // test that the "youngest child" of every category is indeed a child: int[] parents = tr.getParallelTaxonomyArrays().parents(); for (int i = 0; i < tr.getSize(); i++) { int youngestChild = children[i]; if (youngestChild != TaxonomyReader.INVALID_ORDINAL) { assertEquals(i, parents[youngestChild]); } } // test that the "older sibling" of every category is indeed older (lower) // (it can also be INVALID_ORDINAL, which is lower than any ordinal) for (int i = 0; i < tr.getSize(); i++) { assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i); } // test that the "older sibling" of every category is indeed a sibling // (they share the same parent) for (int i = 0; i < tr.getSize(); i++) { int sibling = olderSiblingArray[i]; if (sibling == TaxonomyReader.INVALID_ORDINAL) { continue; } assertEquals(parents[i], parents[sibling]); } // And now for slightly more complex (and less "invariant-like"...) // tests: // test that the "youngest child" is indeed the youngest (so we don't // miss the first children in the chain) for (int i = 0; i < tr.getSize(); i++) { // Find the really youngest child: int j; for (j = tr.getSize() - 1; j > i; j--) { if (parents[j] == i) { break; // found youngest child } } if (j == i) { // no child found j = TaxonomyReader.INVALID_ORDINAL; } assertEquals(j, children[i]); } // test that the "older sibling" is indeed the least oldest one - and // not a too old one or -1 (so we didn't miss some children in the // middle or the end of the chain). for (int i = 0; i < tr.getSize(); i++) { // Find the youngest older sibling: int j; for (j = i - 1; j >= 0; j--) { if (parents[j] == parents[i]) { break; // found youngest older sibling } } if (j < 0) { // no sibling found j = TaxonomyReader.INVALID_ORDINAL; } assertEquals(j, olderSiblingArray[i]); } tr.close(); indexDir.close(); }
/** * Basic tests for TaxonomyReader's category <=> ordinal transformations (getSize(), getCategory() * and getOrdinal()). We test that after writing the index, it can be read and all the categories * and ordinals are there just as we expected them to be. */ @Test public void testReaderBasic() throws Exception { Directory indexDir = newDirectory(); TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); fillTaxonomy(tw); tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); // test TaxonomyReader.getSize(): assertEquals(expectedCategories.length, tr.getSize()); // test round trips of ordinal => category => ordinal for (int i = 0; i < tr.getSize(); i++) { assertEquals(i, tr.getOrdinal(tr.getPath(i))); } // test TaxonomyReader.getCategory(): for (int i = 1; i < tr.getSize(); i++) { FacetLabel expectedCategory = new FacetLabel(expectedCategories[i]); FacetLabel category = tr.getPath(i); if (!expectedCategory.equals(category)) { fail( "For ordinal " + i + " expected category " + showcat(expectedCategory) + ", but got " + showcat(category)); } } // (also test invalid ordinals:) assertNull(tr.getPath(-1)); assertNull(tr.getPath(tr.getSize())); assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL)); // test TaxonomyReader.getOrdinal(): for (int i = 1; i < expectedCategories.length; i++) { int expectedOrdinal = i; int ordinal = tr.getOrdinal(new FacetLabel(expectedCategories[i])); if (expectedOrdinal != ordinal) { fail( "For category " + showcat(expectedCategories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } // (also test invalid categories:) assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("non-existant"))); assertEquals( TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("Author", "Jules Verne"))); tr.close(); indexDir.close(); }