private static Bucket sumBucket(Collection<Bucket> dist, double numGroups) { Constant sumLow = null, sumHigh = new DoubleConstant(1.0); double totalFreq = 0.0; Map<Constant, Bucket> highs = new HashMap<Constant, Bucket>(); for (Bucket bkt : dist) { // estimate sumLow as the only one smallest value in a group if (sumLow == null || bkt.valueRange().low().compareTo(sumLow) < 0) sumLow = bkt.valueRange().low(); totalFreq += bkt.frequency(); highs.put(bkt.valueRange().high(), bkt); } SortedSet<Constant> desc = new TreeSet<Constant>(highs.keySet()).descendingSet(); // estimate sumHigh as the sum of top maxGroupSize values double maxGroupSize = totalFreq - numGroups + 1; double currSize = 0.0; for (Constant high : desc) { Bucket bkt = highs.get(high); double recsToSum = Math.min(bkt.frequency(), maxGroupSize - currSize); sumHigh = sumHigh.add(high.mul(new DoubleConstant(recsToSum))); currSize += recsToSum; if (Double.compare(currSize, maxGroupSize) >= 0) break; } ConstantRange sumRange = ConstantRange.newInstance(sumLow, true, sumHigh, true); // discard percentiles return new Bucket(sumRange, numGroups, numGroups); }
private static Bucket countBucket(Collection<Bucket> dist, double numGroups) { Constant cntLow = new DoubleConstant(1.0); Double totalFreq = 0.0; for (Bucket bkt : dist) { totalFreq += bkt.frequency(); } double maxGroupSize = totalFreq - numGroups + 1; Constant cntHigh = new DoubleConstant(maxGroupSize); ConstantRange countRange = ConstantRange.newInstance(cntLow, true, cntHigh, true); // discard percentiles return new Bucket(countRange, numGroups, numGroups); }
private static Bucket avgBucket(Collection<Bucket> dist, double numGroups) { Constant avgLow = null, avgHigh = null; for (Bucket bkt : dist) { if (avgLow == null || bkt.valueRange().low().compareTo(avgLow) < 0) avgLow = bkt.valueRange().low(); if (avgHigh == null || bkt.valueRange().high().compareTo(avgHigh) > 0) avgHigh = bkt.valueRange().high(); } ConstantRange avgRange = ConstantRange.newInstance(avgLow, true, avgHigh, true); // discard percentiles return new Bucket(avgRange, numGroups, numGroups); }
private static Bucket distinctCountBucket(Collection<Bucket> dist, double numGroups) { Constant dcLow = new DoubleConstant(1.0); Double totalFreq = 0.0, dv = 0.0; for (Bucket bkt : dist) { totalFreq += bkt.frequency(); dv += bkt.distinctValues(); } double maxGroupSize = totalFreq - numGroups + 1; Constant dcHigh = new DoubleConstant(Math.min(maxGroupSize, dv)); ConstantRange distinctCountRange = ConstantRange.newInstance(dcLow, true, dcHigh, true); // discard percentiles return new Bucket(distinctCountRange, numGroups, numGroups); }
private static Bucket maxBucket(Collection<Bucket> dist, double numGroups) { Constant maxLow = null, maxHigh = null; Double dv = 0.0; for (Bucket bkt : dist) { if (maxLow == null || bkt.valueRange().low().compareTo(maxLow) < 0) maxLow = bkt.valueRange().low(); if (maxHigh == null || bkt.valueRange().high().compareTo(maxHigh) > 0) maxHigh = bkt.valueRange().high(); dv += bkt.distinctValues(); } ConstantRange maxRange = ConstantRange.newInstance(maxLow, true, maxHigh, true); // discard percentiles return new Bucket(maxRange, numGroups, Math.min(numGroups, dv)); }