/** * Fill the list recursively with collideNum numbers that sum to <i>sum</i>. Assuming items * already in the array p sum to <i>usedSum</i>. The numbers must be > min. It stops if it finds * <i>limit</i> patterns and the probability of those found patterns become very small comparing * to the other patterns found (probability <1/freq) * * @param patterns * @param collideNum * @param sum * @param usedSum sum of items in array <i>p</i> * @param min: is used to make p a sorted array * @param level what is the current level of recursion (number of items in array <i>p</i>) * @param p keeps track of already decided items in the pattern, it will be a sorted array * @param oldDistribution * @param limit the number of patterns to be found * @param freqI the frequency of the sum items in the sketch * @return */ private boolean getPatternRecursive( List<Map<Integer, Integer>> patterns, int collideNum, int sum, int usedSum, int min, int level, int[] p, Distribution oldDistribution, double limit, int freqI) { if (min > sum - usedSum) { // repetitive return false; } if (level == collideNum - 1) { // create the pattern with whatever remained out of sum p[level] = sum - usedSum; if (oldDistribution.getFreq(p[level]) == 0) { return false; } Map<Integer, Integer> pattern = getPattern(p); patterns.add(pattern); double prob = getProb(oldDistribution, pattern); if (sumProb == 0) { meanProb = prob * (level + 1); } else { meanProb = alpha * meanProb + (1 - alpha) * prob * (level + 1); } sumProb += prob; } else { for (int j = min; j <= sum - usedSum - (collideNum - level - 1); j++) { p[level] = j; if (oldDistribution.getFreq(p[level]) == 0) { continue; } // if I have found enough patterns and the probability of found items are very small if (patterns.size() > limit && meanProb / sumProb < 1.0 / freqI) { return true; } getPatternRecursive( patterns, collideNum, sum, usedSum + j, Math.max(min, j), level + 1, p, oldDistribution, limit, freqI); } } return false; }
/** * Returns te probability of each of the key items in the pattern. It leverages Poisson * distribution. See the paper for its description * * @param distribution * @param pattern * @return */ private double getProb(Distribution distribution, Map<Integer, Integer> pattern) { return pattern .entrySet() .stream() .mapToDouble( e -> { double l = distribution.getFreq(e.getKey()) / capwidth; return l == 0 ? 0 : (new PoissonDistribution(l).probability(e.getValue()) / FastMath.exp(-l)); }) .reduce((a, b) -> (a * b)) .getAsDouble(); }