Ejemplo n.º 1
0
 private static Bucket sumBucket(Collection<Bucket> dist, double numGroups) {
   Constant sumLow = null, sumHigh = new DoubleConstant(1.0);
   double totalFreq = 0.0;
   Map<Constant, Bucket> highs = new HashMap<Constant, Bucket>();
   for (Bucket bkt : dist) {
     // estimate sumLow as the only one smallest value in a group
     if (sumLow == null || bkt.valueRange().low().compareTo(sumLow) < 0)
       sumLow = bkt.valueRange().low();
     totalFreq += bkt.frequency();
     highs.put(bkt.valueRange().high(), bkt);
   }
   SortedSet<Constant> desc = new TreeSet<Constant>(highs.keySet()).descendingSet();
   // estimate sumHigh as the sum of top maxGroupSize values
   double maxGroupSize = totalFreq - numGroups + 1;
   double currSize = 0.0;
   for (Constant high : desc) {
     Bucket bkt = highs.get(high);
     double recsToSum = Math.min(bkt.frequency(), maxGroupSize - currSize);
     sumHigh = sumHigh.add(high.mul(new DoubleConstant(recsToSum)));
     currSize += recsToSum;
     if (Double.compare(currSize, maxGroupSize) >= 0) break;
   }
   ConstantRange sumRange = ConstantRange.newInstance(sumLow, true, sumHigh, true);
   // discard percentiles
   return new Bucket(sumRange, numGroups, numGroups);
 }
Ejemplo n.º 2
0
 private static Bucket countBucket(Collection<Bucket> dist, double numGroups) {
   Constant cntLow = new DoubleConstant(1.0);
   Double totalFreq = 0.0;
   for (Bucket bkt : dist) {
     totalFreq += bkt.frequency();
   }
   double maxGroupSize = totalFreq - numGroups + 1;
   Constant cntHigh = new DoubleConstant(maxGroupSize);
   ConstantRange countRange = ConstantRange.newInstance(cntLow, true, cntHigh, true);
   // discard percentiles
   return new Bucket(countRange, numGroups, numGroups);
 }
Ejemplo n.º 3
0
 private static Bucket avgBucket(Collection<Bucket> dist, double numGroups) {
   Constant avgLow = null, avgHigh = null;
   for (Bucket bkt : dist) {
     if (avgLow == null || bkt.valueRange().low().compareTo(avgLow) < 0)
       avgLow = bkt.valueRange().low();
     if (avgHigh == null || bkt.valueRange().high().compareTo(avgHigh) > 0)
       avgHigh = bkt.valueRange().high();
   }
   ConstantRange avgRange = ConstantRange.newInstance(avgLow, true, avgHigh, true);
   // discard percentiles
   return new Bucket(avgRange, numGroups, numGroups);
 }
Ejemplo n.º 4
0
 private static Bucket distinctCountBucket(Collection<Bucket> dist, double numGroups) {
   Constant dcLow = new DoubleConstant(1.0);
   Double totalFreq = 0.0, dv = 0.0;
   for (Bucket bkt : dist) {
     totalFreq += bkt.frequency();
     dv += bkt.distinctValues();
   }
   double maxGroupSize = totalFreq - numGroups + 1;
   Constant dcHigh = new DoubleConstant(Math.min(maxGroupSize, dv));
   ConstantRange distinctCountRange = ConstantRange.newInstance(dcLow, true, dcHigh, true);
   // discard percentiles
   return new Bucket(distinctCountRange, numGroups, numGroups);
 }
Ejemplo n.º 5
0
 private static Bucket maxBucket(Collection<Bucket> dist, double numGroups) {
   Constant maxLow = null, maxHigh = null;
   Double dv = 0.0;
   for (Bucket bkt : dist) {
     if (maxLow == null || bkt.valueRange().low().compareTo(maxLow) < 0)
       maxLow = bkt.valueRange().low();
     if (maxHigh == null || bkt.valueRange().high().compareTo(maxHigh) > 0)
       maxHigh = bkt.valueRange().high();
     dv += bkt.distinctValues();
   }
   ConstantRange maxRange = ConstantRange.newInstance(maxLow, true, maxHigh, true);
   // discard percentiles
   return new Bucket(maxRange, numGroups, Math.min(numGroups, dv));
 }