コード例 #1
0
ファイル: FullHLLTest.java プロジェクト: PATRIC3/p3_solr
  /** Smoke test for {@link HLL#cardinality()} and the proper use of the small range correction. */
  @Test
  public void smallRangeSmokeTest() {
    final int log2m = 11;
    final int m = (1 << log2m);
    final int regwidth = 5;

    // only one register set
    {
      final HLL hll =
          new HLL(
              log2m,
              regwidth,
              128 /*explicitThreshold, arbitrary, unused*/,
              256 /*sparseThreshold, arbitrary, unused*/,
              HLLType.FULL);
      hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 0 /*ix*/, 1 /*val*/));

      final long cardinality = hll.cardinality();

      // Trivially true that small correction conditions hold: one register
      // set implies zeroes exist, and estimator trivially smaller than 5m/2.
      // Small range correction: m * log(m/V)
      final long expected = (long) Math.ceil(m * Math.log((double) m / (m - 1) /*# of zeroes*/));
      assertEquals(cardinality, expected);
    }

    // all but one register set
    {
      final HLL hll =
          new HLL(
              log2m,
              regwidth,
              128 /*explicitThreshold, arbitrary, unused*/,
              256 /*sparseThreshold, arbitrary, unused*/,
              HLLType.FULL);
      for (int i = 0; i < (m - 1); i++) {
        hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i /*ix*/, 1 /*val*/));
      }

      // Trivially true that small correction conditions hold: all but
      // one register set implies a zero exists, and estimator trivially
      // smaller than 5m/2 since it's alpha / ((m-1)/2)
      final long cardinality = hll.cardinality();

      // Small range correction: m * log(m/V)
      final long expected = (long) Math.ceil(m * Math.log((double) m / 1 /*# of zeroes*/));
      assertEquals(cardinality, expected);
    }
  }
コード例 #2
0
ファイル: FullHLLTest.java プロジェクト: PATRIC3/p3_solr
  /** Smoke test for {@link HLL#cardinality()} and the proper use of the large range correction. */
  @Test
  public void largeRangeSmokeTest() {
    final int log2m = 12;
    final int regwidth = 5;
    // regwidth = 5, so hash space is
    // log2m + (2^5 - 1 - 1), so L = log2m + 30
    final int l = log2m + 30;
    final int m = (1 << log2m);
    final HLL hll =
        new HLL(
            log2m,
            regwidth,
            128 /*explicitThreshold, arbitrary, unused*/,
            256 /*sparseThreshold, arbitrary, unused*/,
            HLLType.FULL);

    {
      final int registerValue = 31 /*chosen to ensure large correction kicks in*/;
      for (int i = 0; i < m; i++) {
        hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
      }

      final long cardinality = hll.cardinality();

      // Simplified estimator when all registers take same value: alpha / (m/2^val)
      final double estimator = HLLUtil.alphaMSquared(m) / ((double) m / Math.pow(2, registerValue));

      // Assert conditions for large range

      assertTrue(estimator > Math.pow(2, l) / 30);

      // Large range correction: -2^L * log(1 - E/2^L)
      final long expected =
          (long) Math.ceil(-1.0 * Math.pow(2, l) * Math.log(1.0 - estimator / Math.pow(2, l)));
      assertEquals(cardinality, expected);
    }
  }
コード例 #3
0
ファイル: FullHLLTest.java プロジェクト: PATRIC3/p3_solr
  /** Smoke test for {@link HLL#cardinality()} and the proper use of the uncorrected estimator */
  @Test
  public void normalRangeSmokeTest() {
    final int log2m = 11;
    final int regwidth = 5;
    // regwidth = 5, so hash space is
    // log2m + (2^5 - 1 - 1), so L = log2m + 30
    final int l = log2m + 30;
    final int m = (1 << log2m);
    final HLL hll =
        new HLL(
            log2m,
            regwidth,
            128 /*explicitThreshold, arbitrary, unused*/,
            256 /*sparseThreshold, arbitrary, unused*/,
            HLLType.FULL);

    // all registers at 'medium' value
    {
      final int registerValue = 7 /*chosen to ensure neither correction kicks in*/;
      for (int i = 0; i < m; i++) {
        hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
      }

      final long cardinality = hll.cardinality();

      // Simplified estimator when all registers take same value: alpha / (m/2^val)
      final double estimator = HLLUtil.alphaMSquared(m) / ((double) m / Math.pow(2, registerValue));

      // Assert conditions for uncorrected range
      assertTrue(estimator <= Math.pow(2, l) / 30);
      assertTrue(estimator > (5 * m / (double) 2));

      final long expected = (long) Math.ceil(estimator);
      assertEquals(cardinality, expected);
    }
  }
コード例 #4
0
ファイル: FullHLLTest.java プロジェクト: PATRIC3/p3_solr
  /** Tests {@link HLL#toBytes(ISchemaVersion)} and {@link HLL#fromBytes(byte[])}. */
  @Test
  public void toFromBytesTest() {
    final int log2m = 11 /*arbitrary*/;
    final int regwidth = 5;

    final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
    final HLLType type = HLLType.FULL;
    final int padding = schemaVersion.paddingBytes(type);
    final int dataByteCount =
        ProbabilisticTestUtil.getRequiredBytes(regwidth, (1 << log2m) /*aka 2^log2m = m*/);
    final int expectedByteCount = padding + dataByteCount;

    { // Should work on an empty element
      final HLL hll =
          new HLL(
              log2m,
              regwidth,
              128 /*explicitThreshold, arbitrary, unused*/,
              256 /*sparseThreshold, arbitrary, unused*/,
              HLLType.FULL);
      final byte[] bytes = hll.toBytes(schemaVersion);

      // assert output length is correct
      assertEquals(bytes.length, expectedByteCount);

      final HLL inHLL = HLL.fromBytes(bytes);

      // assert register values correct
      assertElementsEqual(hll, inHLL);
    }
    { // Should work on a partially filled element
      final HLL hll =
          new HLL(
              log2m,
              regwidth,
              128 /*explicitThreshold, arbitrary, unused*/,
              256 /*sparseThreshold, arbitrary, unused*/,
              HLLType.FULL);

      for (int i = 0; i < 3; i++) {
        final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i + 9));
        hll.addRaw(rawValue);
      }

      final byte[] bytes = hll.toBytes(schemaVersion);

      // assert output length is correct
      assertEquals(bytes.length, expectedByteCount);

      final HLL inHLL = HLL.fromBytes(bytes);

      // assert register values correct
      assertElementsEqual(hll, inHLL);
    }
    { // Should work on a full set
      final HLL hll =
          new HLL(
              log2m,
              regwidth,
              128 /*explicitThreshold, arbitrary, unused*/,
              256 /*sparseThreshold, arbitrary, unused*/,
              HLLType.FULL);

      for (int i = 0; i < (1 << log2m) /*aka 2^log2m*/; i++) {
        final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i % 9) + 1);
        hll.addRaw(rawValue);
      }

      final byte[] bytes = hll.toBytes(schemaVersion);

      // assert output length is correct
      assertEquals(bytes.length, expectedByteCount);

      final HLL inHLL = HLL.fromBytes(bytes);

      // assert register values correct
      assertElementsEqual(hll, inHLL);
    }
  }