/** Smoke test for {@link HLL#cardinality()} and the proper use of the small range correction. */ @Test public void smallRangeSmokeTest() { final int log2m = 11; final int m = (1 << log2m); final int regwidth = 5; // only one register set { final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 0 /*ix*/, 1 /*val*/)); final long cardinality = hll.cardinality(); // Trivially true that small correction conditions hold: one register // set implies zeroes exist, and estimator trivially smaller than 5m/2. // Small range correction: m * log(m/V) final long expected = (long) Math.ceil(m * Math.log((double) m / (m - 1) /*# of zeroes*/)); assertEquals(cardinality, expected); } // all but one register set { final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); for (int i = 0; i < (m - 1); i++) { hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i /*ix*/, 1 /*val*/)); } // Trivially true that small correction conditions hold: all but // one register set implies a zero exists, and estimator trivially // smaller than 5m/2 since it's alpha / ((m-1)/2) final long cardinality = hll.cardinality(); // Small range correction: m * log(m/V) final long expected = (long) Math.ceil(m * Math.log((double) m / 1 /*# of zeroes*/)); assertEquals(cardinality, expected); } }
/** Smoke test for {@link HLL#cardinality()} and the proper use of the large range correction. */ @Test public void largeRangeSmokeTest() { final int log2m = 12; final int regwidth = 5; // regwidth = 5, so hash space is // log2m + (2^5 - 1 - 1), so L = log2m + 30 final int l = log2m + 30; final int m = (1 << log2m); final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); { final int registerValue = 31 /*chosen to ensure large correction kicks in*/; for (int i = 0; i < m; i++) { hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue)); } final long cardinality = hll.cardinality(); // Simplified estimator when all registers take same value: alpha / (m/2^val) final double estimator = HLLUtil.alphaMSquared(m) / ((double) m / Math.pow(2, registerValue)); // Assert conditions for large range assertTrue(estimator > Math.pow(2, l) / 30); // Large range correction: -2^L * log(1 - E/2^L) final long expected = (long) Math.ceil(-1.0 * Math.pow(2, l) * Math.log(1.0 - estimator / Math.pow(2, l))); assertEquals(cardinality, expected); } }
/** Smoke test for {@link HLL#cardinality()} and the proper use of the uncorrected estimator */ @Test public void normalRangeSmokeTest() { final int log2m = 11; final int regwidth = 5; // regwidth = 5, so hash space is // log2m + (2^5 - 1 - 1), so L = log2m + 30 final int l = log2m + 30; final int m = (1 << log2m); final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); // all registers at 'medium' value { final int registerValue = 7 /*chosen to ensure neither correction kicks in*/; for (int i = 0; i < m; i++) { hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue)); } final long cardinality = hll.cardinality(); // Simplified estimator when all registers take same value: alpha / (m/2^val) final double estimator = HLLUtil.alphaMSquared(m) / ((double) m / Math.pow(2, registerValue)); // Assert conditions for uncorrected range assertTrue(estimator <= Math.pow(2, l) / 30); assertTrue(estimator > (5 * m / (double) 2)); final long expected = (long) Math.ceil(estimator); assertEquals(cardinality, expected); } }
/** Tests {@link HLL#toBytes(ISchemaVersion)} and {@link HLL#fromBytes(byte[])}. */ @Test public void toFromBytesTest() { final int log2m = 11 /*arbitrary*/; final int regwidth = 5; final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION; final HLLType type = HLLType.FULL; final int padding = schemaVersion.paddingBytes(type); final int dataByteCount = ProbabilisticTestUtil.getRequiredBytes(regwidth, (1 << log2m) /*aka 2^log2m = m*/); final int expectedByteCount = padding + dataByteCount; { // Should work on an empty element final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); final byte[] bytes = hll.toBytes(schemaVersion); // assert output length is correct assertEquals(bytes.length, expectedByteCount); final HLL inHLL = HLL.fromBytes(bytes); // assert register values correct assertElementsEqual(hll, inHLL); } { // Should work on a partially filled element final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); for (int i = 0; i < 3; i++) { final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i + 9)); hll.addRaw(rawValue); } final byte[] bytes = hll.toBytes(schemaVersion); // assert output length is correct assertEquals(bytes.length, expectedByteCount); final HLL inHLL = HLL.fromBytes(bytes); // assert register values correct assertElementsEqual(hll, inHLL); } { // Should work on a full set final HLL hll = new HLL( log2m, regwidth, 128 /*explicitThreshold, arbitrary, unused*/, 256 /*sparseThreshold, arbitrary, unused*/, HLLType.FULL); for (int i = 0; i < (1 << log2m) /*aka 2^log2m*/; i++) { final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i % 9) + 1); hll.addRaw(rawValue); } final byte[] bytes = hll.toBytes(schemaVersion); // assert output length is correct assertEquals(bytes.length, expectedByteCount); final HLL inHLL = HLL.fromBytes(bytes); // assert register values correct assertElementsEqual(hll, inHLL); } }