private static Double evaluate(Collection<?> values, int quantile) { List<Double> doubleValues = new ArrayList<>(); for (Object value : values) { Double doubleValue = (Double) TypeUtil.parseOrCast(DataType.DOUBLE, value); doubleValues.add(doubleValue); } double[] data = Doubles.toArray(doubleValues); // The data must be (at least partially) ordered Arrays.sort(data); Percentile percentile = new Percentile(); percentile.setData(data); return percentile.evaluate(quantile); }
/** * @return an array of values to split the numeric feature's values on when building candidate * splits. When input size is <= MAX_NUMERIC_SPLITS + 1, it will return the averages between * success values as split points. When larger, it will return MAX_NUMERIC_SPLITS approximate * percentiles through the data. */ private static double[] chooseNumericSplitPoints(double[] values) { if (values.length <= 1) { return values; } if (values.length <= MAX_NUMERIC_SPLITS + 1) { double[] splitPoints = new double[values.length - 1]; for (int i = 1; i < values.length; i++) { splitPoints[i - 1] = (values[i] + values[i - 1]) / 2.0; } return splitPoints; } Percentile distribution = new Percentile(); distribution.setData(values); double[] percentiles = new double[MAX_NUMERIC_SPLITS]; for (int i = 0; i < percentiles.length; i++) { double p = 100.0 * ((i + 1.0) / (MAX_NUMERIC_SPLITS + 1.0)); percentiles[i] = distribution.evaluate(p); } return percentiles; }
/** * Returns an estimate of the <code>p</code>th percentile of the values in the <code>values</code> * array, starting with the element in (0-based) position <code>begin</code> in the array and * including <code>length</code> values. * * <p> * * <ul> * <li>Returns <code>Double.NaN</code> if <code>length = 0</code> * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> if <code>length = 1 * </code> * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> is null , <code> * begin</code> or <code>length</code> is invalid, or <code>p</code> is not a valid quantile * value (p must be greater than 0 and less than or equal to 100) * </ul> * * <p>See {@link org.apache.commons.math3.stat.descriptive.rank.Percentile} for a description of * the percentile estimation algorithm used. * * @param values array of input values * @param p the percentile to compute * @param begin the first (0-based) element to include in the computation * @param length the number of array elements to include * @return the percentile value * @throws MathIllegalArgumentException if the parameters are not valid or the input array is null */ public static double percentile( final double[] values, final int begin, final int length, final double p) throws MathIllegalArgumentException { return PERCENTILE.evaluate(values, begin, length, p); }