Exemplo n.º 1
0
  private void rescale(long newLandmarkInSeconds) {
    // rescale the weights based on a new landmark to avoid numerical overflow issues

    final double factor = Math.exp(-alpha * (newLandmarkInSeconds - landmarkInSeconds));

    weightedCount *= factor;

    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            double oldWeight = node.weightedCount;

            node.weightedCount *= factor;

            if (oldWeight >= ZERO_WEIGHT_THRESHOLD && node.weightedCount < ZERO_WEIGHT_THRESHOLD) {
              --nonZeroNodeCount;
            }

            return true;
          }
        });

    landmarkInSeconds = newLandmarkInSeconds;
  }
Exemplo n.º 2
0
  public void serialize(final DataOutput output) {
    try {
      output.writeDouble(maxError);
      output.writeDouble(alpha);
      output.writeLong(landmarkInSeconds);
      output.writeLong(min);
      output.writeLong(max);
      output.writeInt(totalNodeCount);

      postOrderTraversal(
          root,
          new Callback() {
            @Override
            public boolean process(Node node) {
              try {
                serializeNode(output, node);
              } catch (IOException e) {
                Throwables.propagate(e);
              }
              return true;
            }
          });
    } catch (IOException e) {
      Throwables.propagate(e);
    }
  }
Exemplo n.º 3
0
  @VisibleForTesting
  void compress() {
    ++compressions;

    final int compressionFactor = calculateCompressionFactor();

    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            if (node.isLeaf()) {
              return true;
            }

            // if children's weights are ~0 remove them and shift the weight to their parent

            double leftWeight = 0;
            if (node.left != null) {
              leftWeight = node.left.weightedCount;
            }

            double rightWeight = 0;
            if (node.right != null) {
              rightWeight = node.right.weightedCount;
            }

            boolean shouldCompress =
                node.weightedCount + leftWeight + rightWeight
                    < (int) (weightedCount / compressionFactor);

            double oldNodeWeight = node.weightedCount;
            if (shouldCompress || leftWeight < ZERO_WEIGHT_THRESHOLD) {
              node.left = tryRemove(node.left);

              weightedCount += leftWeight;
              node.weightedCount += leftWeight;
            }

            if (shouldCompress || rightWeight < ZERO_WEIGHT_THRESHOLD) {
              node.right = tryRemove(node.right);

              weightedCount += rightWeight;
              node.weightedCount += rightWeight;
            }

            if (oldNodeWeight < ZERO_WEIGHT_THRESHOLD
                && node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
              ++nonZeroNodeCount;
            }

            return true;
          }
        });

    if (root != null && root.weightedCount < ZERO_WEIGHT_THRESHOLD) {
      root = tryRemove(root);
    }
  }
Exemplo n.º 4
0
  public String toGraphviz() {
    StringBuilder builder = new StringBuilder();

    builder.append("digraph QuantileDigest {\n").append("\tgraph [ordering=\"out\"];");

    final List<Node> nodes = new ArrayList<>();
    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            nodes.add(node);
            return true;
          }
        });

    Multimap<Integer, Node> nodesByLevel =
        Multimaps.index(
            nodes,
            new Function<Node, Integer>() {
              @Override
              public Integer apply(Node input) {
                return input.level;
              }
            });

    for (Map.Entry<Integer, Collection<Node>> entry : nodesByLevel.asMap().entrySet()) {
      builder.append("\tsubgraph level_" + entry.getKey() + " {\n").append("\t\trank = same;\n");

      for (Node node : entry.getValue()) {
        builder.append(
            String.format(
                "\t\t%s [label=\"[%s..%s]@%s\\n%s\", shape=rect, style=filled,color=%s];\n",
                idFor(node),
                node.getLowerBound(),
                node.getUpperBound(),
                node.level,
                node.weightedCount,
                node.weightedCount > 0 ? "salmon2" : "white"));
      }

      builder.append("\t}\n");
    }

    for (Node node : nodes) {
      if (node.left != null) {
        builder.append(format("\t%s -> %s;\n", idFor(node), idFor(node.left)));
      }
      if (node.right != null) {
        builder.append(format("\t%s -> %s;\n", idFor(node), idFor(node.right)));
      }
    }

    builder.append("}\n");

    return builder.toString();
  }
Exemplo n.º 5
0
  /*
   * Get the exponentially-decayed approximate counts of values in multiple buckets. The elements in
   * the provided list denote the upper bound each of the buckets and must be sorted in ascending
   * order.
   *
   * The approximate count in each bucket is guaranteed to be within 2 * totalCount * maxError of
   * the real count.
   */
  public List<Bucket> getHistogram(List<Long> bucketUpperBounds) {
    checkArgument(
        Ordering.natural().isOrdered(bucketUpperBounds),
        "buckets must be sorted in increasing order");

    final ImmutableList.Builder<Bucket> builder = ImmutableList.builder();
    final PeekingIterator<Long> iterator = Iterators.peekingIterator(bucketUpperBounds.iterator());

    final AtomicDouble sum = new AtomicDouble();
    final AtomicDouble lastSum = new AtomicDouble();

    // for computing weighed average of values in bucket
    final AtomicDouble bucketWeightedSum = new AtomicDouble();

    final double normalizationFactor = weight(TimeUnit.NANOSECONDS.toSeconds(ticker.read()));

    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {

            while (iterator.hasNext() && iterator.peek() <= node.getUpperBound()) {
              double bucketCount = sum.get() - lastSum.get();

              Bucket bucket =
                  new Bucket(
                      bucketCount / normalizationFactor, bucketWeightedSum.get() / bucketCount);

              builder.add(bucket);
              lastSum.set(sum.get());
              bucketWeightedSum.set(0);
              iterator.next();
            }

            bucketWeightedSum.addAndGet(node.getMiddle() * node.weightedCount);
            sum.addAndGet(node.weightedCount);
            return iterator.hasNext();
          }
        });

    while (iterator.hasNext()) {
      double bucketCount = sum.get() - lastSum.get();
      Bucket bucket =
          new Bucket(bucketCount / normalizationFactor, bucketWeightedSum.get() / bucketCount);

      builder.add(bucket);

      iterator.next();
    }

    return builder.build();
  }
Exemplo n.º 6
0
  public long getMax() {
    final AtomicLong chosen = new AtomicLong(max);
    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
              chosen.set(node.getUpperBound());
              return false;
            }
            return true;
          }
        },
        TraversalOrder.REVERSE);

    return Math.min(max, chosen.get());
  }
Exemplo n.º 7
0
  @VisibleForTesting
  void validate() {
    final AtomicDouble sumOfWeights = new AtomicDouble();
    final AtomicInteger actualNodeCount = new AtomicInteger();
    final AtomicInteger actualNonZeroNodeCount = new AtomicInteger();

    if (root != null) {
      validateStructure(root);

      postOrderTraversal(
          root,
          new Callback() {
            @Override
            public boolean process(Node node) {
              sumOfWeights.addAndGet(node.weightedCount);
              actualNodeCount.incrementAndGet();

              if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
                actualNonZeroNodeCount.incrementAndGet();
              }

              return true;
            }
          });
    }

    checkState(
        Math.abs(sumOfWeights.get() - weightedCount) < ZERO_WEIGHT_THRESHOLD,
        "Computed weight (%s) doesn't match summary (%s)",
        sumOfWeights.get(),
        weightedCount);

    checkState(
        actualNodeCount.get() == totalNodeCount,
        "Actual node count (%s) doesn't match summary (%s)",
        actualNodeCount.get(),
        totalNodeCount);

    checkState(
        actualNonZeroNodeCount.get() == nonZeroNodeCount,
        "Actual non-zero node count (%s) doesn't match summary (%s)",
        actualNonZeroNodeCount.get(),
        nonZeroNodeCount);
  }
Exemplo n.º 8
0
  /**
   * Gets the values at the specified quantiles +/- maxError. The list of quantiles must be sorted
   * in increasing order, and each value must be in the range [0, 1]
   */
  public List<Long> getQuantiles(List<Double> quantiles) {
    checkArgument(
        Ordering.natural().isOrdered(quantiles), "quantiles must be sorted in increasing order");
    for (double quantile : quantiles) {
      checkArgument(quantile >= 0 && quantile <= 1, "quantile must be between [0,1]");
    }

    final ImmutableList.Builder<Long> builder = ImmutableList.builder();
    final PeekingIterator<Double> iterator = Iterators.peekingIterator(quantiles.iterator());

    postOrderTraversal(
        root,
        new Callback() {
          private double sum = 0;

          @Override
          public boolean process(Node node) {
            sum += node.weightedCount;

            while (iterator.hasNext() && sum > iterator.peek() * weightedCount) {
              iterator.next();

              // we know the max value ever seen, so cap the percentile to provide better error
              // bounds in this case
              long value = Math.min(node.getUpperBound(), max);

              builder.add(value);
            }

            return iterator.hasNext();
          }
        });

    // we finished the traversal without consuming all quantiles. This means the remaining quantiles
    // correspond to the max known value
    while (iterator.hasNext()) {
      builder.add(max);
      iterator.next();
    }

    return builder.build();
  }