@Test public void testOrcHiveBloomFilterSerde() throws Exception { BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05); bloomFilterWrite.addString(TEST_STRING); assertTrue(bloomFilterWrite.testString(TEST_STRING)); OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder(); bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet())); bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions()); OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build(); OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance(); byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex); // Read through method InputStream inputStream = new ByteArrayInputStream(bytes); OrcMetadataReader metadataReader = new OrcMetadataReader(); List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream); assertEquals(bloomFilters.size(), 1); assertTrue(bloomFilters.get(0).testString(TEST_STRING)); assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN)); assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize()); assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions()); // Validate bit set assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet())); // Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs) CodedInputStream input = CodedInputStream.newInstance(bytes); OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input); List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList(); assertEquals(bloomFilterList.size(), 1); OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0); // Validate contents of ORC bloom filter bit set assertTrue( Arrays.equals( Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet())); // hash functions assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions()); // bit size assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount()); }
@Test public void testHiveBloomFilterSerde() throws Exception { BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05); // String bloomFilter.addString(TEST_STRING); assertTrue(bloomFilter.testString(TEST_STRING)); assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer bloomFilter.addLong(TEST_INTEGER); assertTrue(bloomFilter.testLong(TEST_INTEGER)); assertFalse(bloomFilter.testLong(TEST_INTEGER + 1)); // Re-construct HiveBloomFilter hiveBloomFilter = new HiveBloomFilter( ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())), bloomFilter.getBitSize(), bloomFilter.getNumHashFunctions()); // String assertTrue(hiveBloomFilter.testString(TEST_STRING)); assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN)); // Integer assertTrue(hiveBloomFilter.testLong(TEST_INTEGER)); assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1)); }
private static OrcProto.BloomFilter toOrcBloomFilter(BloomFilter bloomFilter) { OrcProto.BloomFilter.Builder builder = OrcProto.BloomFilter.newBuilder(); builder.addAllBitset(Longs.asList(bloomFilter.getBitSet())); builder.setNumHashFunctions(bloomFilter.getNumHashFunctions()); return builder.build(); }