/** * Assigns sequential identifiers to the provided <code>clusters</code> (and their sub-clusters). * If a cluster already has an identifier, the identifier will not be changed. * * @param clusters Clusters to assign identifiers to. * @throws IllegalArgumentException if the provided clusters contain non-unique identifiers */ public static void assignClusterIds(Collection<Cluster> clusters) { final ArrayList<Cluster> flattened = Lists.newArrayListWithExpectedSize(clusters.size()); flatten(flattened, clusters); synchronized (clusters) { final HashSet<Integer> ids = Sets.newHashSet(); // First, find the start value for the id and check uniqueness of the ids // already provided. int maxId = Integer.MIN_VALUE; for (final Cluster cluster : flattened) { if (cluster.id != null) { if (!ids.add(cluster.id)) { throw new IllegalArgumentException("Non-unique cluster id found: " + cluster.id); } maxId = Math.max(maxId, cluster.id); } } // We'd rather start with 0 maxId = Math.max(maxId, -1); // Assign missing ids for (final Cluster c : flattened) { if (c.id == null) { c.id = ++maxId; } } } }
public void merge(QuantileDigest other) { rescaleToCommonLandmark(this, other); // 2. merge other into this (don't modify other) root = merge(root, other.root); max = Math.max(max, other.max); min = Math.min(min, other.min); // 3. compress to remove unnecessary nodes compress(); }
private int calculateCompressionFactor() { if (root == null) { return 1; } return Math.max((int) ((root.level + 1) / maxError), 1); }
private void rescale(long newLandmarkInSeconds) { // rescale the weights based on a new landmark to avoid numerical overflow issues final double factor = Math.exp(-alpha * (newLandmarkInSeconds - landmarkInSeconds)); weightedCount *= factor; postOrderTraversal( root, new Callback() { @Override public boolean process(Node node) { double oldWeight = node.weightedCount; node.weightedCount *= factor; if (oldWeight >= ZERO_WEIGHT_THRESHOLD && node.weightedCount < ZERO_WEIGHT_THRESHOLD) { --nonZeroNodeCount; } return true; } }); landmarkInSeconds = newLandmarkInSeconds; }
private void updateEstimatedCompactionsByTasks(List<List<SSTableReader>> tasks) { int n = 0; for (List<SSTableReader> bucket : tasks) { if (bucket.size() >= cfs.getMinimumCompactionThreshold()) n += Math.ceil((double) bucket.size() / cfs.getMaximumCompactionThreshold()); } estimatedRemainingTasks = n; }
/** * Computes the max "weight" of any path starting at node and ending at a leaf in the hypothetical * complete tree. The weight is the sum of counts in the ancestors of a given node */ private double computeMaxPathWeight(Node node) { if (node == null || node.level == 0) { return 0; } double leftMaxWeight = computeMaxPathWeight(node.left); double rightMaxWeight = computeMaxPathWeight(node.right); return Math.max(leftMaxWeight, rightMaxWeight) + node.weightedCount; }
// purchase product and return relevant product recommendations public List<Product> purchase(Customer customer, Product product) { purchasesCache.put(customer.id, product.id); ProductAssociativityGraph graph = productAssociativityGraphMap.get(product.category); Vertex v = Vertex.create(product.id); List<Vertex> associations = graph.getProductAssociations(v); int recommendSize = Math.min(associations.size(), maxNumRecommendations); return associations .stream() .map(vertex -> productCache.get(vertex.productId)) .limit(recommendSize) .collect(Collectors.toList()); }
/** Adds a value to this digest. The value must be {@code >= 0} */ public void add(long value, long count) { checkArgument(count > 0, "count must be > 0"); long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read()); int maxExpectedNodeCount = 3 * calculateCompressionFactor(); if (nowInSeconds - landmarkInSeconds >= RESCALE_THRESHOLD_SECONDS) { rescale(nowInSeconds); compress(); // need to compress to get rid of nodes that may have decayed to ~ 0 } else if (nonZeroNodeCount > MAX_SIZE_FACTOR * maxExpectedNodeCount && compressAutomatically) { // The size (number of non-zero nodes) of the digest is at most 3 * compression factor // If we're over MAX_SIZE_FACTOR of the expected size, compress // Note: we don't compress as soon as we go over expectedNodeCount to avoid unnecessarily // running a compression for every new added element when we're close to boundary compress(); } double weight = weight(TimeUnit.NANOSECONDS.toSeconds(ticker.read())) * count; max = Math.max(max, value); min = Math.min(min, value); insert(longToBits(value), weight); }
private Node merge(Node node, Node other) { if (node == null) { return copyRecursive(other); } else if (other == null) { return node; } else if (!inSameSubtree(node.bits, other.bits, Math.max(node.level, other.level))) { return makeSiblings(node, copyRecursive(other)); } else if (node.level > other.level) { long branch = other.bits & node.getBranchMask(); if (branch == 0) { node.left = merge(node.left, other); } else { node.right = merge(node.right, other); } return node; } else if (node.level < other.level) { Node result = createNode(other.bits, other.level, other.weightedCount); long branch = node.bits & other.getBranchMask(); if (branch == 0) { result.left = merge(node, other.left); result.right = copyRecursive(other.right); } else { result.left = copyRecursive(other.left); result.right = merge(node, other.right); } return result; } // else, they must be at the same level and on the same path, so just bump the counts double oldWeight = node.weightedCount; weightedCount += other.weightedCount; node.weightedCount = node.weightedCount + other.weightedCount; node.left = merge(node.left, other.left); node.right = merge(node.right, other.right); if (oldWeight < ZERO_WEIGHT_THRESHOLD && node.weightedCount >= ZERO_WEIGHT_THRESHOLD) { nonZeroNodeCount++; } return node; }
private void rescaleToCommonLandmark(QuantileDigest one, QuantileDigest two) { long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read()); // 1. rescale this and other to common landmark long targetLandmark = Math.max(one.landmarkInSeconds, two.landmarkInSeconds); if (nowInSeconds - targetLandmark >= RESCALE_THRESHOLD_SECONDS) { targetLandmark = nowInSeconds; } if (targetLandmark != one.landmarkInSeconds) { one.rescale(targetLandmark); } if (targetLandmark != two.landmarkInSeconds) { two.rescale(targetLandmark); } }
public long getMax() { final AtomicLong chosen = new AtomicLong(max); postOrderTraversal( root, new Callback() { @Override public boolean process(Node node) { if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) { chosen.set(node.getUpperBound()); return false; } return true; } }, TraversalOrder.REVERSE); return Math.min(max, chosen.get()); }
@VisibleForTesting void validate() { final AtomicDouble sumOfWeights = new AtomicDouble(); final AtomicInteger actualNodeCount = new AtomicInteger(); final AtomicInteger actualNonZeroNodeCount = new AtomicInteger(); if (root != null) { validateStructure(root); postOrderTraversal( root, new Callback() { @Override public boolean process(Node node) { sumOfWeights.addAndGet(node.weightedCount); actualNodeCount.incrementAndGet(); if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) { actualNonZeroNodeCount.incrementAndGet(); } return true; } }); } checkState( Math.abs(sumOfWeights.get() - weightedCount) < ZERO_WEIGHT_THRESHOLD, "Computed weight (%s) doesn't match summary (%s)", sumOfWeights.get(), weightedCount); checkState( actualNodeCount.get() == totalNodeCount, "Actual node count (%s) doesn't match summary (%s)", actualNodeCount.get(), totalNodeCount); checkState( actualNonZeroNodeCount.get() == nonZeroNodeCount, "Actual non-zero node count (%s) doesn't match summary (%s)", actualNonZeroNodeCount.get(), nonZeroNodeCount); }
/** * @param bucket list of sstables, ordered from newest to oldest by getMinTimestamp(). * @param maxThreshold maximum number of sstables in a single compaction task. * @return A bucket trimmed to the <code>maxThreshold</code> newest sstables. */ @VisibleForTesting static List<SSTableReader> trimToThreshold(List<SSTableReader> bucket, int maxThreshold) { // Trim the oldest sstables off the end to meet the maxThreshold return bucket.subList(0, Math.min(bucket.size(), maxThreshold)); }
private double weight(long timestamp) { return Math.exp(alpha * (timestamp - landmarkInSeconds)); }