@Test public void testHiveBloomFilterSerde() throws Exception { BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05); // String bloomFilter.addString(TEST_STRING); assertTrue(bloomFilter.testString(TEST_STRING)); assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer bloomFilter.addLong(TEST_INTEGER); assertTrue(bloomFilter.testLong(TEST_INTEGER)); assertFalse(bloomFilter.testLong(TEST_INTEGER + 1)); // Re-construct HiveBloomFilter hiveBloomFilter = new HiveBloomFilter( ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())), bloomFilter.getBitSize(), bloomFilter.getNumHashFunctions()); // String assertTrue(hiveBloomFilter.testString(TEST_STRING)); assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer assertTrue(hiveBloomFilter.testLong(TEST_INTEGER)); assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1)); }
@Test // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom // filter public void testMatches() throws Exception { // stripe column Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L); TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain); // predicate consist of the bigint_0 = 1234 TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0))); TupleDomain<String> emptyEffectivePredicate = TupleDomain.all(); // predicate column references List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder() .add(new ColumnReference<>(COLUMN_0, 0, BIGINT)) .add(new ColumnReference<>(COLUMN_1, 1, BIGINT)) .build(); TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true); TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true); // assemble a matching and a non-matching bloom filter HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01)); OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter); hiveBloomFilter.addLong(1234); OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter); Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of( 0, new ColumnStatistics( null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, toHiveBloomFilter(orcBloomFilter))); Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of( 0, new ColumnStatistics( null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, toHiveBloomFilter(emptyOrcBloomFilter))); Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of( 0, new ColumnStatistics( null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, null)); assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex)); assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex)); assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex)); assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex)); }