@Test public void testBloomFilterPredicateValuesExisting() throws Exception { BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01); for (Object o : TEST_VALUES.keySet()) { if (o instanceof Long) { bloomFilter.addLong((Long) o); } else if (o instanceof Integer) { bloomFilter.addLong((Integer) o); } else if (o instanceof String) { bloomFilter.addString((String) o); } else if (o instanceof BigDecimal) { bloomFilter.addString(o.toString()); } else if (o instanceof Slice) { bloomFilter.addString(((Slice) o).toStringUtf8()); } else if (o instanceof Timestamp) { bloomFilter.addLong(((Timestamp) o).getTime()); } else if (o instanceof Double) { bloomFilter.addDouble((Double) o); } else { fail("Unsupported type " + o.getClass()); } } for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) { boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue()); assertTrue(matched, "type " + testValue.getClass()); } // test unsupported type: can be supported by ORC but is not implemented yet assertTrue( checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true"); }
@Test public void testHiveBloomFilterSerde() throws Exception { BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05); // String bloomFilter.addString(TEST_STRING); assertTrue(bloomFilter.testString(TEST_STRING)); assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer bloomFilter.addLong(TEST_INTEGER); assertTrue(bloomFilter.testLong(TEST_INTEGER)); assertFalse(bloomFilter.testLong(TEST_INTEGER + 1)); // Re-construct HiveBloomFilter hiveBloomFilter = new HiveBloomFilter( ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())), bloomFilter.getBitSize(), bloomFilter.getNumHashFunctions()); // String assertTrue(hiveBloomFilter.testString(TEST_STRING)); assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer assertTrue(hiveBloomFilter.testLong(TEST_INTEGER)); assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1)); }