public void testRandom() throws IOException { final int maxDoc = TestUtil.nextInt(random(), 1, 10000000); for (int i = 1; i < maxDoc / 2; i <<= 1) { final int numDocs = TestUtil.nextInt(random(), 1, i); final FixedBitSet docs = new FixedBitSet(maxDoc); int c = 0; while (c < numDocs) { final int d = random().nextInt(maxDoc); if (docs.get(d) == false) { docs.set(d); c += 1; } } final int[] array = new int[numDocs + random().nextInt(100)]; DocIdSetIterator it = new BitSetIterator(docs, 0L); int j = 0; for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { array[j++] = doc; } assertEquals(numDocs, j); // add some duplicates while (j < array.length) { array[j++] = array[random().nextInt(numDocs)]; } // shuffle for (j = array.length - 1; j >= 1; --j) { final int k = random().nextInt(j); int tmp = array[j]; array[j] = array[k]; array[k] = tmp; } // add docs out of order DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc); for (j = 0; j < array.length; ) { final int l = TestUtil.nextInt(random(), 1, array.length - j); DocIdSetBuilder.BulkAdder adder = null; for (int k = 0, budget = 0; k < l; ++k) { if (budget == 0 || rarely()) { budget = TestUtil.nextInt(random(), 1, l - k + 5); adder = builder.grow(budget); } adder.add(array[j++]); budget--; } } final DocIdSet expected = new BitDocIdSet(docs); final DocIdSet actual = builder.build(); assertEquals(expected, actual); } }
public void testDense() throws IOException { final int maxDoc = 1000000 + random().nextInt(1000000); DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc); final int numIterators = 1 + random().nextInt(10); final FixedBitSet ref = new FixedBitSet(maxDoc); for (int i = 0; i < numIterators; ++i) { RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc); for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(100)) { b.add(doc); ref.set(doc); } builder.add(b.build().iterator()); } DocIdSet result = builder.build(); assertTrue(result instanceof BitDocIdSet); assertEquals(new BitDocIdSet(ref), result); }
public void testMisleadingDISICost() throws IOException { final int maxDoc = TestUtil.nextInt(random(), 1000, 10000); DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc); FixedBitSet expected = new FixedBitSet(maxDoc); for (int i = 0; i < 10; ++i) { final FixedBitSet docs = new FixedBitSet(maxDoc); final int numDocs = random().nextInt(maxDoc / 1000); for (int j = 0; j < numDocs; ++j) { docs.set(random().nextInt(maxDoc)); } expected.or(docs); // We provide a cost of 0 here to make sure the builder can deal with wrong costs builder.add(new BitSetIterator(docs, 0L)); } assertEquals(new BitDocIdSet(expected), builder.build()); }
public void testLeverageStats() throws IOException { // single-valued points PointValues values = new DummyPointValues(42, 42); DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo"); assertEquals(1d, builder.numValuesPerDoc, 0d); assertFalse(builder.multivalued); DocIdSetBuilder.BulkAdder adder = builder.grow(2); adder.add(5); adder.add(7); DocIdSet set = builder.build(); assertTrue(set instanceof BitDocIdSet); assertEquals(2, set.iterator().cost()); // multi-valued points values = new DummyPointValues(42, 63); builder = new DocIdSetBuilder(100, values, "foo"); assertEquals(1.5, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); adder = builder.grow(2); adder.add(5); adder.add(7); set = builder.build(); assertTrue(set instanceof BitDocIdSet); assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice // incomplete stats values = new DummyPointValues(42, -1); builder = new DocIdSetBuilder(100, values, "foo"); assertEquals(1d, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); values = new DummyPointValues(-1, 84); builder = new DocIdSetBuilder(100, values, "foo"); assertEquals(1d, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); // single-valued terms Terms terms = new DummyTerms(42, 42); builder = new DocIdSetBuilder(100, terms); assertEquals(1d, builder.numValuesPerDoc, 0d); assertFalse(builder.multivalued); adder = builder.grow(2); adder.add(5); adder.add(7); set = builder.build(); assertTrue(set instanceof BitDocIdSet); assertEquals(2, set.iterator().cost()); // multi-valued terms terms = new DummyTerms(42, 63); builder = new DocIdSetBuilder(100, terms); assertEquals(1.5, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); adder = builder.grow(2); adder.add(5); adder.add(7); set = builder.build(); assertTrue(set instanceof BitDocIdSet); assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice // incomplete stats terms = new DummyTerms(42, -1); builder = new DocIdSetBuilder(100, terms); assertEquals(1d, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); terms = new DummyTerms(-1, 84); builder = new DocIdSetBuilder(100, terms); assertEquals(1d, builder.numValuesPerDoc, 0d); assertTrue(builder.multivalued); }