private static Double evaluate(Collection<?> values, int quantile) {
    List<Double> doubleValues = new ArrayList<>();

    for (Object value : values) {
      Double doubleValue = (Double) TypeUtil.parseOrCast(DataType.DOUBLE, value);

      doubleValues.add(doubleValue);
    }

    double[] data = Doubles.toArray(doubleValues);

    // The data must be (at least partially) ordered
    Arrays.sort(data);

    Percentile percentile = new Percentile();
    percentile.setData(data);

    return percentile.evaluate(quantile);
  }
예제 #2
0
파일: OptIgSplit.java 프로젝트: mapr/mahout
 /**
  * @return an array of values to split the numeric feature's values on when building candidate
  *     splits. When input size is <= MAX_NUMERIC_SPLITS + 1, it will return the averages between
  *     success values as split points. When larger, it will return MAX_NUMERIC_SPLITS approximate
  *     percentiles through the data.
  */
 private static double[] chooseNumericSplitPoints(double[] values) {
   if (values.length <= 1) {
     return values;
   }
   if (values.length <= MAX_NUMERIC_SPLITS + 1) {
     double[] splitPoints = new double[values.length - 1];
     for (int i = 1; i < values.length; i++) {
       splitPoints[i - 1] = (values[i] + values[i - 1]) / 2.0;
     }
     return splitPoints;
   }
   Percentile distribution = new Percentile();
   distribution.setData(values);
   double[] percentiles = new double[MAX_NUMERIC_SPLITS];
   for (int i = 0; i < percentiles.length; i++) {
     double p = 100.0 * ((i + 1.0) / (MAX_NUMERIC_SPLITS + 1.0));
     percentiles[i] = distribution.evaluate(p);
   }
   return percentiles;
 }