private void checkSanity(BloomFilter<Object> bf) { assertFalse(bf.mightContain(new Object())); assertFalse(bf.apply(new Object())); for (int i = 0; i < 100; i++) { Object o = new Object(); bf.put(o); assertTrue(bf.mightContain(o)); assertTrue(bf.apply(o)); } }
public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() { int numInsertions = 1000000; BloomFilter<String> bf = BloomFilter.create( Funnels.unencodedCharsFunnel(), numInsertions, 0.03, BloomFilterStrategies.MURMUR128_MITZ_32); // Insert "numInsertions" even numbers into the BF. for (int i = 0; i < numInsertions * 2; i += 2) { bf.put(Integer.toString(i)); } // Assert that the BF "might" have all of the even numbers. for (int i = 0; i < numInsertions * 2; i += 2) { assertTrue(bf.mightContain(Integer.toString(i))); } // Now we check for known false positives using a set of known false positives. // (These are all of the false positives under 900.) ImmutableSet<Integer> falsePositives = ImmutableSet.of( 49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781); for (int i = 1; i < 900; i += 2) { if (!falsePositives.contains(i)) { assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); } } // Check that there are exactly 29824 false positives for this BF. int knownNumberOfFalsePositives = 29824; int numFpp = 0; for (int i = 1; i < numInsertions * 2; i += 2) { if (bf.mightContain(Integer.toString(i))) { numFpp++; } } assertEquals(knownNumberOfFalsePositives, numFpp); double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; double expectedFpp = bf.expectedFpp(); // The normal order of (expected, actual) is reversed here on purpose. assertEquals(actualFpp, expectedFpp, 0.00015); }
public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() { int numInsertions = 1000000; BloomFilter<String> bf = BloomFilter.create( Funnels.stringFunnel(UTF_8), numInsertions, 0.03, BloomFilterStrategies.MURMUR128_MITZ_64); // Insert "numInsertions" even numbers into the BF. for (int i = 0; i < numInsertions * 2; i += 2) { bf.put(Integer.toString(i)); } // Assert that the BF "might" have all of the even numbers. for (int i = 0; i < numInsertions * 2; i += 2) { assertTrue(bf.mightContain(Integer.toString(i))); } // Now we check for known false positives using a set of known false positives. // (These are all of the false positives under 900.) ImmutableSet<Integer> falsePositives = ImmutableSet.of(129, 471, 723, 89, 751, 835, 871); for (int i = 1; i < 900; i += 2) { if (!falsePositives.contains(i)) { assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); } } // Check that there are exactly 29763 false positives for this BF. int knownNumberOfFalsePositives = 29763; int numFpp = 0; for (int i = 1; i < numInsertions * 2; i += 2) { if (bf.mightContain(Integer.toString(i))) { numFpp++; } } assertEquals(knownNumberOfFalsePositives, numFpp); double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; double expectedFpp = bf.expectedFpp(); // The normal order of (expected, actual) is reversed here on purpose. assertEquals(actualFpp, expectedFpp, 0.00033); }
public void testPutReturnValue() { for (int i = 0; i < 10; i++) { BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); for (int j = 0; j < 10; j++) { String value = new Object().toString(); boolean mightContain = bf.mightContain(value); boolean put = bf.put(value); assertTrue(mightContain != put); } } }
public void testJavaSerialization() { BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100); for (int i = 0; i < 10; i++) { bf.put(Ints.toByteArray(i)); } BloomFilter<byte[]> copy = SerializableTester.reserialize(bf); for (int i = 0; i < 10; i++) { assertTrue(copy.mightContain(Ints.toByteArray(i))); } assertEquals(bf.expectedFpp(), copy.expectedFpp()); SerializableTester.reserializeAndAssert(bf); }
public void testBloom() throws Exception { int numInsertions = 1000000; double fpp = 0.03D; Random random = new Random(1L); BloomFilter<Long> filter = BloomFilter.create(Funnels.longFunnel(), numInsertions, fpp); for (int l = 0; l < numInsertions; l++) { filter.put(random.nextLong()); } random = new Random(1L); for (int l = 0; l < numInsertions; l++) { assertTrue(filter.mightContain(random.nextLong())); } }
protected String redirect(String title) { if (useBloomFilter && !redirectFilter.mightContain(title)) return title; String to = redirects.get(title); int count = 0; while (to != null && !to.equals(title) && count++ <= 50) { title = to; to = redirects.get(title); } if (count >= 50) { System.out.println( "Fixed point reached with title : " + title + " ; stopping to loop in redirects at this moment"); } return title; }
public void testPutAll() { int element1 = 1; int element2 = 2; BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100); bf1.put(element1); assertTrue(bf1.mightContain(element1)); assertFalse(bf1.mightContain(element2)); BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100); bf2.put(element2); assertFalse(bf2.mightContain(element1)); assertTrue(bf2.mightContain(element2)); assertTrue(bf1.isCompatible(bf2)); bf1.putAll(bf2); assertTrue(bf1.mightContain(element1)); assertTrue(bf1.mightContain(element2)); assertFalse(bf2.mightContain(element1)); assertTrue(bf2.mightContain(element2)); }
@Override public boolean hasSeen(LookupKey key) { return filter.mightContain(key); }