Beispiel #1
0
  /**
   * Assigns sequential identifiers to the provided <code>clusters</code> (and their sub-clusters).
   * If a cluster already has an identifier, the identifier will not be changed.
   *
   * @param clusters Clusters to assign identifiers to.
   * @throws IllegalArgumentException if the provided clusters contain non-unique identifiers
   */
  public static void assignClusterIds(Collection<Cluster> clusters) {
    final ArrayList<Cluster> flattened = Lists.newArrayListWithExpectedSize(clusters.size());

    flatten(flattened, clusters);

    synchronized (clusters) {
      final HashSet<Integer> ids = Sets.newHashSet();

      // First, find the start value for the id and check uniqueness of the ids
      // already provided.
      int maxId = Integer.MIN_VALUE;
      for (final Cluster cluster : flattened) {
        if (cluster.id != null) {
          if (!ids.add(cluster.id)) {
            throw new IllegalArgumentException("Non-unique cluster id found: " + cluster.id);
          }
          maxId = Math.max(maxId, cluster.id);
        }
      }

      // We'd rather start with 0
      maxId = Math.max(maxId, -1);

      // Assign missing ids
      for (final Cluster c : flattened) {
        if (c.id == null) {
          c.id = ++maxId;
        }
      }
    }
  }
Beispiel #2
0
  private int calculateCompressionFactor() {
    if (root == null) {
      return 1;
    }

    return Math.max((int) ((root.level + 1) / maxError), 1);
  }
Beispiel #3
0
  /**
   * Computes the max "weight" of any path starting at node and ending at a leaf in the hypothetical
   * complete tree. The weight is the sum of counts in the ancestors of a given node
   */
  private double computeMaxPathWeight(Node node) {
    if (node == null || node.level == 0) {
      return 0;
    }

    double leftMaxWeight = computeMaxPathWeight(node.left);
    double rightMaxWeight = computeMaxPathWeight(node.right);

    return Math.max(leftMaxWeight, rightMaxWeight) + node.weightedCount;
  }
Beispiel #4
0
  public void merge(QuantileDigest other) {
    rescaleToCommonLandmark(this, other);

    // 2. merge other into this (don't modify other)
    root = merge(root, other.root);

    max = Math.max(max, other.max);
    min = Math.min(min, other.min);

    // 3. compress to remove unnecessary nodes
    compress();
  }
Beispiel #5
0
  private Node merge(Node node, Node other) {
    if (node == null) {
      return copyRecursive(other);
    } else if (other == null) {
      return node;
    } else if (!inSameSubtree(node.bits, other.bits, Math.max(node.level, other.level))) {
      return makeSiblings(node, copyRecursive(other));
    } else if (node.level > other.level) {
      long branch = other.bits & node.getBranchMask();

      if (branch == 0) {
        node.left = merge(node.left, other);
      } else {
        node.right = merge(node.right, other);
      }
      return node;
    } else if (node.level < other.level) {
      Node result = createNode(other.bits, other.level, other.weightedCount);

      long branch = node.bits & other.getBranchMask();
      if (branch == 0) {
        result.left = merge(node, other.left);
        result.right = copyRecursive(other.right);
      } else {
        result.left = copyRecursive(other.left);
        result.right = merge(node, other.right);
      }

      return result;
    }

    // else, they must be at the same level and on the same path, so just bump the counts
    double oldWeight = node.weightedCount;

    weightedCount += other.weightedCount;
    node.weightedCount = node.weightedCount + other.weightedCount;
    node.left = merge(node.left, other.left);
    node.right = merge(node.right, other.right);

    if (oldWeight < ZERO_WEIGHT_THRESHOLD && node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
      nonZeroNodeCount++;
    }

    return node;
  }
Beispiel #6
0
  private void rescaleToCommonLandmark(QuantileDigest one, QuantileDigest two) {
    long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read());

    // 1. rescale this and other to common landmark
    long targetLandmark = Math.max(one.landmarkInSeconds, two.landmarkInSeconds);

    if (nowInSeconds - targetLandmark >= RESCALE_THRESHOLD_SECONDS) {
      targetLandmark = nowInSeconds;
    }

    if (targetLandmark != one.landmarkInSeconds) {
      one.rescale(targetLandmark);
    }

    if (targetLandmark != two.landmarkInSeconds) {
      two.rescale(targetLandmark);
    }
  }
Beispiel #7
0
  public long getMin() {
    final AtomicLong chosen = new AtomicLong(min);
    postOrderTraversal(
        root,
        new Callback() {
          @Override
          public boolean process(Node node) {
            if (node.weightedCount >= ZERO_WEIGHT_THRESHOLD) {
              chosen.set(node.getLowerBound());
              return false;
            }
            return true;
          }
        },
        TraversalOrder.FORWARD);

    return Math.max(min, chosen.get());
  }
Beispiel #8
0
  /** Adds a value to this digest. The value must be {@code >= 0} */
  public void add(long value, long count) {
    checkArgument(count > 0, "count must be > 0");

    long nowInSeconds = TimeUnit.NANOSECONDS.toSeconds(ticker.read());

    int maxExpectedNodeCount = 3 * calculateCompressionFactor();
    if (nowInSeconds - landmarkInSeconds >= RESCALE_THRESHOLD_SECONDS) {
      rescale(nowInSeconds);
      compress(); // need to compress to get rid of nodes that may have decayed to ~ 0
    } else if (nonZeroNodeCount > MAX_SIZE_FACTOR * maxExpectedNodeCount && compressAutomatically) {
      // The size (number of non-zero nodes) of the digest is at most 3 * compression factor
      // If we're over MAX_SIZE_FACTOR of the expected size, compress
      // Note: we don't compress as soon as we go over expectedNodeCount to avoid unnecessarily
      // running a compression for every new added element when we're close to boundary
      compress();
    }

    double weight = weight(TimeUnit.NANOSECONDS.toSeconds(ticker.read())) * count;

    max = Math.max(max, value);
    min = Math.min(min, value);

    insert(longToBits(value), weight);
  }