/** * Assigns sequential identifiers to the provided <code>clusters</code> (and their sub-clusters). * If a cluster already has an identifier, the identifier will not be changed. * * @param clusters Clusters to assign identifiers to. * @throws IllegalArgumentException if the provided clusters contain non-unique identifiers */ public static void assignClusterIds(Collection<Cluster> clusters) { final ArrayList<Cluster> flattened = Lists.newArrayListWithExpectedSize(clusters.size()); flatten(flattened, clusters); synchronized (clusters) { final HashSet<Integer> ids = Sets.newHashSet(); // First, find the start value for the id and check uniqueness of the ids // already provided. int maxId = Integer.MIN_VALUE; for (final Cluster cluster : flattened) { if (cluster.id != null) { if (!ids.add(cluster.id)) { throw new IllegalArgumentException("Non-unique cluster id found: " + cluster.id); } maxId = Math.max(maxId, cluster.id); } } // We'd rather start with 0 maxId = Math.max(maxId, -1); // Assign missing ids for (final Cluster c : flattened) { if (c.id == null) { c.id = ++maxId; } } } }
private int calculateCompressionFactor() { if (root == null) { return 1; } return Math.max((int) ((root.level + 1) / maxError), 1); }
/** * Computes the max "weight" of any path starting at node and ending at a leaf in the hypothetical * complete tree. The weight is the sum of counts in the ancestors of a given node */ private double computeMaxPathWeight(Node node) { if (node == null || node.level == 0) { return 0; } double leftMaxWeight = computeMaxPathWeight(node.left); double rightMaxWeight = computeMaxPathWeight(node.right); return Math.max(leftMaxWeight, rightMaxWeight) + node.weightedCount; }
public void merge(QuantileDigest other) { rescaleToCommonLandmark(this, other); // 2. merge other into this (don't modify other) root = merge(root, other.root); max = Math.max(max, other.max); min = Math.min(min, other.min); // 3. compress to remove unnecessary nodes compress(); }
private Node merge(Node node, Node other) { if (node == null) { return copyRecursive(other); } else if (other == null) { return node; } else if (!inSameSubtree(node.bits, other.bits, Math.max(node.level, other.level))) { return makeSiblings(node, copyRecursive(other)); } else if (node.level > other.level) { long branch = other.bits & node.getBranchMask(); if (branch == 0) { node.left = merge(node.left, other); } else { node.right = merge(node.right, other); } return node; } else if (node.level < other.level) { Node result = createNode(other.bits, other.level, other.weightedCount); long branch = node.bits & other.getBranchMask(); if (branch == 0) { result.left = merge(node, other.left); result.right = copyRecursive(other.right); } else { result.left = copyRecursive(other.left); result.right = merge(node, other.right); } return result; } // else, they must be at the same level and on the same path, so just bump the counts double oldWeight = node.weightedCount; weightedCount += other.weightedCount; node.weightedCount = node.weightedCount + other.weightedCount; node.left = merge(node.left, other.left); node.right = merge(node.right, other.right); if (oldWeight < ZERO_WEIGHT_THRESHOLD && node.weightedCount >= ZERO_WEIGHT_THRESHOLD) { nonZeroNodeCount++; } return node; }
private void rescaleToCommonLandmark(QuantileDigest one, QuantileDigest two) { long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read()); // 1. rescale this and other to common landmark long targetLandmark = Math.max(one.landmarkInSeconds, two.landmarkInSeconds); if (nowInSeconds - targetLandmark >= RESCALE_THRESHOLD_SECONDS) { targetLandmark = nowInSeconds; } if (targetLandmark != one.landmarkInSeconds) { one.rescale(targetLandmark); } if (targetLandmark != two.landmarkInSeconds) { two.rescale(targetLandmark); } }
public long getMin() { final AtomicLong chosen = new AtomicLong(min); postOrderTraversal( root, new Callback() { @Override public boolean process(Node node) { if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) { chosen.set(node.getLowerBound()); return false; } return true; } }, TraversalOrder.FORWARD); return Math.max(min, chosen.get()); }
/** Adds a value to this digest. The value must be {@code >= 0} */ public void add(long value, long count) { checkArgument(count > 0, "count must be > 0"); long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read()); int maxExpectedNodeCount = 3 * calculateCompressionFactor(); if (nowInSeconds - landmarkInSeconds >= RESCALE_THRESHOLD_SECONDS) { rescale(nowInSeconds); compress(); // need to compress to get rid of nodes that may have decayed to ~ 0 } else if (nonZeroNodeCount > MAX_SIZE_FACTOR * maxExpectedNodeCount && compressAutomatically) { // The size (number of non-zero nodes) of the digest is at most 3 * compression factor // If we're over MAX_SIZE_FACTOR of the expected size, compress // Note: we don't compress as soon as we go over expectedNodeCount to avoid unnecessarily // running a compression for every new added element when we're close to boundary compress(); } double weight = weight(TimeUnit.NANOSECONDS.toSeconds(ticker.read())) * count; max = Math.max(max, value); min = Math.min(min, value); insert(longToBits(value), weight); }