private static Double evaluate(Collection<?> values, int quantile) {
    List<Double> doubleValues = new ArrayList<>();

    for (Object value : values) {
      Double doubleValue = (Double) TypeUtil.parseOrCast(DataType.DOUBLE, value);

      doubleValues.add(doubleValue);
    }

    double[] data = Doubles.toArray(doubleValues);

    // The data must be (at least partially) ordered
    Arrays.sort(data);

    Percentile percentile = new Percentile();
    percentile.setData(data);

    return percentile.evaluate(quantile);
  }
Пример #2
0
 /**
  * @return an array of values to split the numeric feature's values on when building candidate
  *     splits. When input size is <= MAX_NUMERIC_SPLITS + 1, it will return the averages between
  *     success values as split points. When larger, it will return MAX_NUMERIC_SPLITS approximate
  *     percentiles through the data.
  */
 private static double[] chooseNumericSplitPoints(double[] values) {
   if (values.length <= 1) {
     return values;
   }
   if (values.length <= MAX_NUMERIC_SPLITS + 1) {
     double[] splitPoints = new double[values.length - 1];
     for (int i = 1; i < values.length; i++) {
       splitPoints[i - 1] = (values[i] + values[i - 1]) / 2.0;
     }
     return splitPoints;
   }
   Percentile distribution = new Percentile();
   distribution.setData(values);
   double[] percentiles = new double[MAX_NUMERIC_SPLITS];
   for (int i = 0; i < percentiles.length; i++) {
     double p = 100.0 * ((i + 1.0) / (MAX_NUMERIC_SPLITS + 1.0));
     percentiles[i] = distribution.evaluate(p);
   }
   return percentiles;
 }
Пример #3
0
 /**
  * Returns an estimate of the <code>p</code>th percentile of the values in the <code>values</code>
  * array, starting with the element in (0-based) position <code>begin</code> in the array and
  * including <code>length</code> values.
  *
  * <p>
  *
  * <ul>
  *   <li>Returns <code>Double.NaN</code> if <code>length = 0</code>
  *   <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> if <code>length = 1
  *       </code>
  *   <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> is null , <code>
  *       begin</code> or <code>length</code> is invalid, or <code>p</code> is not a valid quantile
  *       value (p must be greater than 0 and less than or equal to 100)
  * </ul>
  *
  * <p>See {@link org.apache.commons.math3.stat.descriptive.rank.Percentile} for a description of
  * the percentile estimation algorithm used.
  *
  * @param values array of input values
  * @param p the percentile to compute
  * @param begin the first (0-based) element to include in the computation
  * @param length the number of array elements to include
  * @return the percentile value
  * @throws MathIllegalArgumentException if the parameters are not valid or the input array is null
  */
 public static double percentile(
     final double[] values, final int begin, final int length, final double p)
     throws MathIllegalArgumentException {
   return PERCENTILE.evaluate(values, begin, length, p);
 }