예제 #1
0
 @SuppressWarnings("CheckReturnValue")
 @AndroidIncompatible // OutOfMemoryError
 public void testLargeNumberOfInsertions() {
   // We use horrible FPPs here to keep Java from OOM'ing
   BloomFilter.create(Funnels.unencodedCharsFunnel(), 42L + Integer.MAX_VALUE, 0.28);
   BloomFilter.create(Funnels.unencodedCharsFunnel(), 50L * Integer.MAX_VALUE, 0.99);
 }
예제 #2
0
  public void testEquals() {
    BloomFilter<String> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    bf1.put("1");
    bf1.put("2");

    BloomFilter<String> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
    bf2.put("1");
    bf2.put("2");

    new EqualsTester().addEqualityGroup(bf1, bf2).testEquals();

    bf2.put("3");

    new EqualsTester().addEqualityGroup(bf1).addEqualityGroup(bf2).testEquals();
  }
예제 #3
0
 public SpillMap(SpillFile file, int thresholdBytes) throws IOException {
   this.thresholdBytes = thresholdBytes;
   this.spillFile = file;
   byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile);
   bFilters = Lists.newArrayList();
   bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000));
 }
 @Override
 public void configure(Map<String, Object> config) {
   expectedInsertions = toInt(config.get(EXPECTED_INSERTIONS_KEY));
   falsePositiveRate = toDouble(config.get(FALSE_POSITIVE_RATE_KEY));
   name = config.get(NAME_KEY).toString();
   filter = BloomFilter.create(LOOKUPKEY_FUNNEL, expectedInsertions, falsePositiveRate);
 }
예제 #5
0
 /** Sanity checking with many combinations of false positive rates and expected insertions */
 public void testBasic() {
   for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
     for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
       checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
     }
   }
 }
예제 #6
0
 public void testPutAllWithSelf() {
   BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
   try {
     assertFalse(bf1.isCompatible(bf1));
     bf1.putAll(bf1);
     fail();
   } catch (IllegalArgumentException expected) {
   }
 }
예제 #7
0
  public void testPutAllDifferentSizes() {
    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);

    try {
      assertFalse(bf1.isCompatible(bf2));
      bf1.putAll(bf2);
      fail();
    } catch (IllegalArgumentException expected) {
    }

    try {
      assertFalse(bf2.isCompatible(bf1));
      bf2.putAll(bf1);
      fail();
    } catch (IllegalArgumentException expected) {
    }
  }
예제 #8
0
 public void testEquals_empty() {
   new EqualsTester()
       .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
       .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
       .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
       .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
       .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
       .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
       .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
       .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
       .testEquals();
 }
예제 #9
0
 public void testBitSize() {
   double fpp = 0.03;
   for (int i = 1; i < 10000; i++) {
     long numBits = BloomFilter.optimalNumOfBits(i, fpp);
     int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
     assertEquals(
         arraySize * Long.SIZE,
         BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
   }
 }
예제 #10
0
 // Funtion checks if current page has enough space to fit the new serialized tuple
 // If not it flushes the current buffer and gets a new page
 // TODO: The code does not ensure that pages are optimally packed.
 //       It only tries to fill up the current page as much as possbile, if its
 //       exhausted it requests a new page. Instead it would be nice to load the next page
 //       that could fit the new value.
 private void ensureSpace(byte[] value) {
   if (!byteMap.canFit(value)) {
     // Flush current buffer
     byteMap.flushBuffer();
     // Get next page
     byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile);
     // Create new bloomfilter
     bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000));
   }
 }
예제 #11
0
 @SuppressWarnings("CheckReturnValue")
 public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
   try {
     int n = 1000;
     double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
     BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
     fail();
   } catch (IllegalArgumentException expected) {
   }
 }
예제 #12
0
 public void testPutReturnValue() {
   for (int i = 0; i < 10; i++) {
     BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
     for (int j = 0; j < 10; j++) {
       String value = new Object().toString();
       boolean mightContain = bf.mightContain(value);
       boolean put = bf.put(value);
       assertTrue(mightContain != put);
     }
   }
 }
예제 #13
0
  public void testPutAll() {
    int element1 = 1;
    int element2 = 2;

    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
    bf1.put(element1);
    assertTrue(bf1.mightContain(element1));
    assertFalse(bf1.mightContain(element2));

    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
    bf2.put(element2);
    assertFalse(bf2.mightContain(element1));
    assertTrue(bf2.mightContain(element2));

    assertTrue(bf1.isCompatible(bf2));
    bf1.putAll(bf2);
    assertTrue(bf1.mightContain(element1));
    assertTrue(bf1.mightContain(element2));
    assertFalse(bf2.mightContain(element1));
    assertTrue(bf2.mightContain(element2));
  }
예제 #14
0
  public void testCustomSerialization() throws Exception {
    Funnel<byte[]> funnel = Funnels.byteArrayFunnel();
    BloomFilter<byte[]> bf = BloomFilter.create(funnel, 100);
    for (int i = 0; i < 100; i++) {
      bf.put(Ints.toByteArray(i));
    }

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    bf.writeTo(out);

    assertEquals(bf, BloomFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), funnel));
  }
예제 #15
0
 public void testExpectedFpp() {
   BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
   double fpp = bf.expectedFpp();
   assertEquals(0.0, fpp);
   // usually completed in less than 200 iterations
   while (fpp != 1.0) {
     boolean changed = bf.put(new Object());
     double newFpp = bf.expectedFpp();
     // if changed, the new fpp is strictly higher, otherwise it is the same
     assertTrue(changed ? newFpp > fpp : newFpp == fpp);
     fpp = newFpp;
   }
 }
예제 #16
0
 @SuppressWarnings("CheckReturnValue")
 public void testPreconditions() {
   try {
     BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
     fail();
   } catch (IllegalArgumentException expected) {
   }
   try {
     BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
     fail();
   } catch (IllegalArgumentException expected) {
   }
   try {
     BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
     fail();
   } catch (IllegalArgumentException expected) {
   }
   try {
     BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
     fail();
   } catch (IllegalArgumentException expected) {
   }
 }
예제 #17
0
  public void testJavaSerialization() {
    BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
    for (int i = 0; i < 10; i++) {
      bf.put(Ints.toByteArray(i));
    }

    BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
    for (int i = 0; i < 10; i++) {
      assertTrue(copy.mightContain(Ints.toByteArray(i)));
    }
    assertEquals(bf.expectedFpp(), copy.expectedFpp());

    SerializableTester.reserializeAndAssert(bf);
  }
예제 #18
0
  public void testBloom() throws Exception {
    int numInsertions = 1000000;
    double fpp = 0.03D;
    Random random = new Random(1L);

    BloomFilter<Long> filter = BloomFilter.create(Funnels.longFunnel(), numInsertions, fpp);
    for (int l = 0; l < numInsertions; l++) {
      filter.put(random.nextLong());
    }

    random = new Random(1L);
    for (int l = 0; l < numInsertions; l++) {
      assertTrue(filter.mightContain(random.nextLong()));
    }
  }
예제 #19
0
  public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
    int numInsertions = 1000000;
    BloomFilter<String> bf =
        BloomFilter.create(
            Funnels.unencodedCharsFunnel(),
            numInsertions,
            0.03,
            BloomFilterStrategies.MURMUR128_MITZ_32);

    // Insert "numInsertions" even numbers into the BF.
    for (int i = 0; i < numInsertions * 2; i += 2) {
      bf.put(Integer.toString(i));
    }

    // Assert that the BF "might" have all of the even numbers.
    for (int i = 0; i < numInsertions * 2; i += 2) {
      assertTrue(bf.mightContain(Integer.toString(i)));
    }

    // Now we check for known false positives using a set of known false positives.
    // (These are all of the false positives under 900.)
    ImmutableSet<Integer> falsePositives =
        ImmutableSet.of(
            49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
    for (int i = 1; i < 900; i += 2) {
      if (!falsePositives.contains(i)) {
        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
      }
    }

    // Check that there are exactly 29824 false positives for this BF.
    int knownNumberOfFalsePositives = 29824;
    int numFpp = 0;
    for (int i = 1; i < numInsertions * 2; i += 2) {
      if (bf.mightContain(Integer.toString(i))) {
        numFpp++;
      }
    }
    assertEquals(knownNumberOfFalsePositives, numFpp);
    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
    double expectedFpp = bf.expectedFpp();
    // The normal order of (expected, actual) is reversed here on purpose.
    assertEquals(actualFpp, expectedFpp, 0.00015);
  }
예제 #20
0
  public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() {
    int numInsertions = 1000000;
    BloomFilter<String> bf =
        BloomFilter.create(
            Funnels.stringFunnel(UTF_8),
            numInsertions,
            0.03,
            BloomFilterStrategies.MURMUR128_MITZ_64);

    // Insert "numInsertions" even numbers into the BF.
    for (int i = 0; i < numInsertions * 2; i += 2) {
      bf.put(Integer.toString(i));
    }

    // Assert that the BF "might" have all of the even numbers.
    for (int i = 0; i < numInsertions * 2; i += 2) {
      assertTrue(bf.mightContain(Integer.toString(i)));
    }

    // Now we check for known false positives using a set of known false positives.
    // (These are all of the false positives under 900.)
    ImmutableSet<Integer> falsePositives = ImmutableSet.of(129, 471, 723, 89, 751, 835, 871);
    for (int i = 1; i < 900; i += 2) {
      if (!falsePositives.contains(i)) {
        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
      }
    }

    // Check that there are exactly 29763 false positives for this BF.
    int knownNumberOfFalsePositives = 29763;
    int numFpp = 0;
    for (int i = 1; i < numInsertions * 2; i += 2) {
      if (bf.mightContain(Integer.toString(i))) {
        numFpp++;
      }
    }
    assertEquals(knownNumberOfFalsePositives, numFpp);
    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
    double expectedFpp = bf.expectedFpp();
    // The normal order of (expected, actual) is reversed here on purpose.
    assertEquals(actualFpp, expectedFpp, 0.00033);
  }
  private TitleNameNormalizer(String pathToEvaluationRedirectsData) throws IOException {

    if (useBloomFilter) {
      redirectFilter = BloomFilter.create(Funnels.stringFunnel(), ESTIMATED_REDIRECTS);
      redirects =
          new LRUCache<String, String>(5000) {
            protected String loadValue(String src) {
              String normalized = TitleNameIndexer.normalize(src);
              if (normalized == null) return src;
              return TitleNameIndexer.normalize(src);
            }
          };
    } else redirects = new StringMap<String>();
    if (showInitProgress)
      System.out.println(
          "Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
    if (pathToEvaluationRedirectsData != null) {
      InputStream is = CompressionUtils.readSnappyCompressed(pathToEvaluationRedirectsData);
      LineIterator iterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8);

      long linecount = 0;
      while (iterator.hasNext()) {
        String line = iterator.nextLine();
        if (showInitProgress && linecount++ % 100000 == 0)
          System.out.println("loading the latest redirects; linecount=" + linecount);
        String[] parts = StringUtils.split(line, '\t');

        String src = parts[0].trim().replace(' ', '_');
        String trg = parts[1].trim().replace(' ', '_');
        if (useBloomFilter) redirectFilter.put(src);
        else redirects.put(src, trg);
      }
      iterator.close();
    }
    redirects = Collections.unmodifiableMap(redirects);
    if (showInitProgress)
      System.out.println(
          "Done  - Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version");
  }
예제 #22
0
  /** Tests that we always get a non-negative optimal size. */
  @SuppressWarnings("CheckReturnValue")
  public void testOptimalSize() {
    for (int n = 1; n < 1000; n++) {
      for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
        assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
      }
    }

    // some random values
    Random random = new Random(0);
    for (int repeats = 0; repeats < 10000; repeats++) {
      assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
    }

    // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger
    assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(Integer.MAX_VALUE, Double.MIN_VALUE));
    try {
      BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
      fail("we can't represent such a large BF!");
    } catch (IllegalArgumentException expected) {
      assertThat(expected).hasMessage("Could not create BloomFilter of 3327428144502 bits");
    }
  }
 @Override
 public void reset() {
   filter = BloomFilter.create(LOOKUPKEY_FUNNEL, expectedInsertions, falsePositiveRate);
 }
예제 #24
0
 public void testNullPointers() {
   NullPointerTester tester = new NullPointerTester();
   tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
   tester.testAllPublicStaticMethods(BloomFilter.class);
 }
 public BloomAccessTracker(String name, int expectedInsertions, double falsePositiveRate) {
   this.name = name;
   this.expectedInsertions = expectedInsertions;
   this.falsePositiveRate = falsePositiveRate;
   filter = BloomFilter.create(LOOKUPKEY_FUNNEL, expectedInsertions, falsePositiveRate);
 }
예제 #26
0
 public void testCopy() {
   BloomFilter<String> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
   BloomFilter<String> copy = original.copy();
   assertNotSame(original, copy);
   assertEquals(original, copy);
 }
예제 #27
0
 private void createNewBloomFilter(int expectedInsertions) throws IOException {
   bloomFilter =
       BloomFilter.create(
           Funnels.byteArrayFunnel(), expectedInsertions, FALSE_POSITIVE_PROBABILITY);
   this.persistBloomFilter();
 }
예제 #28
0
 public void testEqualsWithCustomFunnel() {
   BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
   BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
   assertEquals(bf1, bf2);
 }
예제 #29
0
 public void testSerializationWithCustomFunnel() {
   SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
 }