Пример #1
0
  @Test
  public void testHiveBloomFilterSerde() throws Exception {
    BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05);

    // String
    bloomFilter.addString(TEST_STRING);
    assertTrue(bloomFilter.testString(TEST_STRING));
    assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN));

    // Integer
    bloomFilter.addLong(TEST_INTEGER);
    assertTrue(bloomFilter.testLong(TEST_INTEGER));
    assertFalse(bloomFilter.testLong(TEST_INTEGER + 1));

    // Re-construct
    HiveBloomFilter hiveBloomFilter =
        new HiveBloomFilter(
            ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())),
            bloomFilter.getBitSize(),
            bloomFilter.getNumHashFunctions());

    // String
    assertTrue(hiveBloomFilter.testString(TEST_STRING));
    assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN));

    // Integer
    assertTrue(hiveBloomFilter.testLong(TEST_INTEGER));
    assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1));
  }
Пример #2
0
  @Test
  // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom
  // filter
  public void testMatches() throws Exception {
    // stripe column
    Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
    TupleDomain.ColumnDomain<String> column0 =
        new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);

    // predicate consist of the bigint_0 = 1234
    TupleDomain<String> effectivePredicate =
        TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
    TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();

    // predicate column references
    List<ColumnReference<String>> columnReferences =
        ImmutableList.<ColumnReference<String>>builder()
            .add(new ColumnReference<>(COLUMN_0, 0, BIGINT))
            .add(new ColumnReference<>(COLUMN_1, 1, BIGINT))
            .build();

    TupleDomainOrcPredicate<String> predicate =
        new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true);
    TupleDomainOrcPredicate<String> emptyPredicate =
        new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true);

    // assemble a matching and a non-matching bloom filter
    HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
    OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    hiveBloomFilter.addLong(1234);
    OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);

    Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex =
        ImmutableMap.of(
            0,
            new ColumnStatistics(
                null,
                null,
                new IntegerStatistics(10L, 2000L),
                null,
                null,
                null,
                null,
                toHiveBloomFilter(orcBloomFilter)));

    Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex =
        ImmutableMap.of(
            0,
            new ColumnStatistics(
                null,
                null,
                new IntegerStatistics(10L, 2000L),
                null,
                null,
                null,
                null,
                toHiveBloomFilter(emptyOrcBloomFilter)));

    Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex =
        ImmutableMap.of(
            0,
            new ColumnStatistics(
                null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, null));

    assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
    assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
    assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
    assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
  }