예제 #1
0
  public void testRandom() throws IOException {
    final int maxDoc = TestUtil.nextInt(random(), 1, 10000000);
    for (int i = 1; i < maxDoc / 2; i <<= 1) {
      final int numDocs = TestUtil.nextInt(random(), 1, i);
      final FixedBitSet docs = new FixedBitSet(maxDoc);
      int c = 0;
      while (c < numDocs) {
        final int d = random().nextInt(maxDoc);
        if (docs.get(d) == false) {
          docs.set(d);
          c += 1;
        }
      }

      final int[] array = new int[numDocs + random().nextInt(100)];
      DocIdSetIterator it = new BitSetIterator(docs, 0L);
      int j = 0;
      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        array[j++] = doc;
      }
      assertEquals(numDocs, j);

      // add some duplicates
      while (j < array.length) {
        array[j++] = array[random().nextInt(numDocs)];
      }

      // shuffle
      for (j = array.length - 1; j >= 1; --j) {
        final int k = random().nextInt(j);
        int tmp = array[j];
        array[j] = array[k];
        array[k] = tmp;
      }

      // add docs out of order
      DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
      for (j = 0; j < array.length; ) {
        final int l = TestUtil.nextInt(random(), 1, array.length - j);
        DocIdSetBuilder.BulkAdder adder = null;
        for (int k = 0, budget = 0; k < l; ++k) {
          if (budget == 0 || rarely()) {
            budget = TestUtil.nextInt(random(), 1, l - k + 5);
            adder = builder.grow(budget);
          }
          adder.add(array[j++]);
          budget--;
        }
      }

      final DocIdSet expected = new BitDocIdSet(docs);
      final DocIdSet actual = builder.build();
      assertEquals(expected, actual);
    }
  }
예제 #2
0
 public void testDense() throws IOException {
   final int maxDoc = 1000000 + random().nextInt(1000000);
   DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
   final int numIterators = 1 + random().nextInt(10);
   final FixedBitSet ref = new FixedBitSet(maxDoc);
   for (int i = 0; i < numIterators; ++i) {
     RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc);
     for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(100)) {
       b.add(doc);
       ref.set(doc);
     }
     builder.add(b.build().iterator());
   }
   DocIdSet result = builder.build();
   assertTrue(result instanceof BitDocIdSet);
   assertEquals(new BitDocIdSet(ref), result);
 }
예제 #3
0
  public void testMisleadingDISICost() throws IOException {
    final int maxDoc = TestUtil.nextInt(random(), 1000, 10000);
    DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
    FixedBitSet expected = new FixedBitSet(maxDoc);

    for (int i = 0; i < 10; ++i) {
      final FixedBitSet docs = new FixedBitSet(maxDoc);
      final int numDocs = random().nextInt(maxDoc / 1000);
      for (int j = 0; j < numDocs; ++j) {
        docs.set(random().nextInt(maxDoc));
      }
      expected.or(docs);
      // We provide a cost of 0 here to make sure the builder can deal with wrong costs
      builder.add(new BitSetIterator(docs, 0L));
    }

    assertEquals(new BitDocIdSet(expected), builder.build());
  }
예제 #4
0
  public void testLeverageStats() throws IOException {
    // single-valued points
    PointValues values = new DummyPointValues(42, 42);
    DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertFalse(builder.multivalued);
    DocIdSetBuilder.BulkAdder adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    DocIdSet set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(2, set.iterator().cost());

    // multi-valued points
    values = new DummyPointValues(42, 63);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1.5, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice

    // incomplete stats
    values = new DummyPointValues(42, -1);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);

    values = new DummyPointValues(-1, 84);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);

    // single-valued terms
    Terms terms = new DummyTerms(42, 42);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertFalse(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(2, set.iterator().cost());

    // multi-valued terms
    terms = new DummyTerms(42, 63);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1.5, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice

    // incomplete stats
    terms = new DummyTerms(42, -1);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);

    terms = new DummyTerms(-1, 84);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
  }