Beispiel #1
0
  /**
   * Assigns sequential identifiers to the provided <code>clusters</code> (and their sub-clusters).
   * If a cluster already has an identifier, the identifier will not be changed.
   *
   * @param clusters Clusters to assign identifiers to.
   * @throws IllegalArgumentException if the provided clusters contain non-unique identifiers
   */
  public static void assignClusterIds(Collection<Cluster> clusters) {
    final ArrayList<Cluster> flattened = Lists.newArrayListWithExpectedSize(clusters.size());

    flatten(flattened, clusters);

    synchronized (clusters) {
      final HashSet<Integer> ids = Sets.newHashSet();

      // First, find the start value for the id and check uniqueness of the ids
      // already provided.
      int maxId = Integer.MIN_VALUE;
      for (final Cluster cluster : flattened) {
        if (cluster.id != null) {
          if (!ids.add(cluster.id)) {
            throw new IllegalArgumentException("Non-unique cluster id found: " + cluster.id);
          }
          maxId = Math.max(maxId, cluster.id);
        }
      }

      // We'd rather start with 0
      maxId = Math.max(maxId, -1);

      // Assign missing ids
      for (final Cluster c : flattened) {
        if (c.id == null) {
          c.id = ++maxId;
        }
      }
    }
  }
Beispiel #2
0
  public void merge(QuantileDigest other) {
    rescaleToCommonLandmark(this, other);

    // 2. merge other into this (don't modify other)
    root = merge(root, other.root);

    max = Math.max(max, other.max);
    min = Math.min(min, other.min);

    // 3. compress to remove unnecessary nodes
    compress();
  }
Beispiel #3
0
  private int calculateCompressionFactor() {
    if (root == null) {
      return 1;
    }

    return Math.max((int) ((root.level + 1) / maxError), 1);
  }
Beispiel #4
0
  private void rescale(long newLandmarkInSeconds) {
    // rescale the weights based on a new landmark to avoid numerical overflow issues

    final double factor = Math.exp(-alpha * (newLandmarkInSeconds - landmarkInSeconds));

    weightedCount *= factor;

    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            double oldWeight = node.weightedCount;

            node.weightedCount *= factor;

            if (oldWeight >= ZERO_WEIGHT_THRESHOLD && node.weightedCount < ZERO_WEIGHT_THRESHOLD) {
              --nonZeroNodeCount;
            }

            return true;
          }
        });

    landmarkInSeconds = newLandmarkInSeconds;
  }
 private void updateEstimatedCompactionsByTasks(List<List<SSTableReader>> tasks) {
   int n = 0;
   for (List<SSTableReader> bucket : tasks) {
     if (bucket.size() >= cfs.getMinimumCompactionThreshold())
       n += Math.ceil((double) bucket.size() / cfs.getMaximumCompactionThreshold());
   }
   estimatedRemainingTasks = n;
 }
Beispiel #6
0
  /**
   * Computes the max "weight" of any path starting at node and ending at a leaf in the hypothetical
   * complete tree. The weight is the sum of counts in the ancestors of a given node
   */
  private double computeMaxPathWeight(Node node) {
    if (node == null || node.level == 0) {
      return 0;
    }

    double leftMaxWeight = computeMaxPathWeight(node.left);
    double rightMaxWeight = computeMaxPathWeight(node.right);

    return Math.max(leftMaxWeight, rightMaxWeight) + node.weightedCount;
  }
  // purchase product and return relevant product recommendations
  public List<Product> purchase(Customer customer, Product product) {
    purchasesCache.put(customer.id, product.id);

    ProductAssociativityGraph graph = productAssociativityGraphMap.get(product.category);
    Vertex v = Vertex.create(product.id);
    List<Vertex> associations = graph.getProductAssociations(v);

    int recommendSize = Math.min(associations.size(), maxNumRecommendations);
    return associations
        .stream()
        .map(vertex -> productCache.get(vertex.productId))
        .limit(recommendSize)
        .collect(Collectors.toList());
  }
Beispiel #8
0
  /** Adds a value to this digest. The value must be {@code >= 0} */
  public void add(long value, long count) {
    checkArgument(count > 0, "count must be > 0");

    long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read());

    int maxExpectedNodeCount = 3 * calculateCompressionFactor();
    if (nowInSeconds - landmarkInSeconds >= RESCALE_THRESHOLD_SECONDS) {
      rescale(nowInSeconds);
      compress(); // need to compress to get rid of nodes that may have decayed to ~ 0
    } else if (nonZeroNodeCount > MAX_SIZE_FACTOR * maxExpectedNodeCount && compressAutomatically) {
      // The size (number of non-zero nodes) of the digest is at most 3 * compression factor
      // If we're over MAX_SIZE_FACTOR of the expected size, compress
      // Note: we don't compress as soon as we go over expectedNodeCount to avoid unnecessarily
      // running a compression for every new added element when we're close to boundary
      compress();
    }

    double weight = weight(TimeUnit.NANOSECONDS.toSeconds(ticker.read())) * count;

    max = Math.max(max, value);
    min = Math.min(min, value);

    insert(longToBits(value), weight);
  }
Beispiel #9
0
  private Node merge(Node node, Node other) {
    if (node == null) {
      return copyRecursive(other);
    } else if (other == null) {
      return node;
    } else if (!inSameSubtree(node.bits, other.bits, Math.max(node.level, other.level))) {
      return makeSiblings(node, copyRecursive(other));
    } else if (node.level > other.level) {
      long branch = other.bits & node.getBranchMask();

      if (branch == 0) {
        node.left = merge(node.left, other);
      } else {
        node.right = merge(node.right, other);
      }
      return node;
    } else if (node.level < other.level) {
      Node result = createNode(other.bits, other.level, other.weightedCount);

      long branch = node.bits & other.getBranchMask();
      if (branch == 0) {
        result.left = merge(node, other.left);
        result.right = copyRecursive(other.right);
      } else {
        result.left = copyRecursive(other.left);
        result.right = merge(node, other.right);
      }

      return result;
    }

    // else, they must be at the same level and on the same path, so just bump the counts
    double oldWeight = node.weightedCount;

    weightedCount += other.weightedCount;
    node.weightedCount = node.weightedCount + other.weightedCount;
    node.left = merge(node.left, other.left);
    node.right = merge(node.right, other.right);

    if (oldWeight < ZERO_WEIGHT_THRESHOLD && node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
      nonZeroNodeCount++;
    }

    return node;
  }
Beispiel #10
0
  private void rescaleToCommonLandmark(QuantileDigest one, QuantileDigest two) {
    long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read());

    // 1. rescale this and other to common landmark
    long targetLandmark = Math.max(one.landmarkInSeconds, two.landmarkInSeconds);

    if (nowInSeconds - targetLandmark >= RESCALE_THRESHOLD_SECONDS) {
      targetLandmark = nowInSeconds;
    }

    if (targetLandmark != one.landmarkInSeconds) {
      one.rescale(targetLandmark);
    }

    if (targetLandmark != two.landmarkInSeconds) {
      two.rescale(targetLandmark);
    }
  }
Beispiel #11
0
  public long getMax() {
    final AtomicLong chosen = new AtomicLong(max);
    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
              chosen.set(node.getUpperBound());
              return false;
            }
            return true;
          }
        },
        TraversalOrder.REVERSE);

    return Math.min(max, chosen.get());
  }
Beispiel #12
0
  @VisibleForTesting
  void validate() {
    final AtomicDouble sumOfWeights = new AtomicDouble();
    final AtomicInteger actualNodeCount = new AtomicInteger();
    final AtomicInteger actualNonZeroNodeCount = new AtomicInteger();

    if (root != null) {
      validateStructure(root);

      postOrderTraversal(
          root,
          new Callback() {
            @Override
            public boolean process(Node node) {
              sumOfWeights.addAndGet(node.weightedCount);
              actualNodeCount.incrementAndGet();

              if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
                actualNonZeroNodeCount.incrementAndGet();
              }

              return true;
            }
          });
    }

    checkState(
        Math.abs(sumOfWeights.get() - weightedCount) < ZERO_WEIGHT_THRESHOLD,
        "Computed weight (%s) doesn't match summary (%s)",
        sumOfWeights.get(),
        weightedCount);

    checkState(
        actualNodeCount.get() == totalNodeCount,
        "Actual node count (%s) doesn't match summary (%s)",
        actualNodeCount.get(),
        totalNodeCount);

    checkState(
        actualNonZeroNodeCount.get() == nonZeroNodeCount,
        "Actual non-zero node count (%s) doesn't match summary (%s)",
        actualNonZeroNodeCount.get(),
        nonZeroNodeCount);
  }
 /**
  * @param bucket list of sstables, ordered from newest to oldest by getMinTimestamp().
  * @param maxThreshold maximum number of sstables in a single compaction task.
  * @return A bucket trimmed to the <code>maxThreshold</code> newest sstables.
  */
 @VisibleForTesting
 static List<SSTableReader> trimToThreshold(List<SSTableReader> bucket, int maxThreshold) {
   // Trim the oldest sstables off the end to meet the maxThreshold
   return bucket.subList(0, Math.min(bucket.size(), maxThreshold));
 }
Beispiel #14
0
 private double weight(long timestamp) {
   return Math.exp(alpha * (timestamp - landmarkInSeconds));
 }