public static Aggregator<Object, String, ?> join( CharSequence delimit, CharSequence prefix, CharSequence suffix) { return Aggregator.of( () -> new StringJoiner(delimit, prefix, suffix), (j, s) -> j.add(!Is.NA(s) ? s.toString() : "NA"), StringJoiner::toString); }
/** @return an aggregator that computes the median. */ public static Aggregator<Number, Double, ?> median() { Aggregator<Number, Double, ? extends List<Number>> of = Aggregator.of( ArrayList::new, ArrayList::add, (list) -> { int size = list.size(); if (size == 0) { return Na.from(Double.class); } else if (size == 1) { return list.get(0).doubleValue(); } else if (size == 2) { return (list.get(0).doubleValue() + list.get(1).doubleValue()) / 2; } else { Collections.sort(list, (a, b) -> Double.compare(a.doubleValue(), b.doubleValue())); int index = (size - 1) / 2; if (size % 2 == 0) { return (list.get(index).doubleValue() + list.get(index + 1).doubleValue()) / 2; } else { return list.get(index).doubleValue(); } } }); return of; }
public static <T> Aggregator<T, Integer, ?> count() { return Aggregator.of( () -> new int[1], (int[] a, T b) -> { if (!Is.NA(b)) { a[0] += 1; } }, (int[] a) -> a[0]); }
public static Aggregator<Number, Double, ?> sum() { return Aggregator.of( () -> new double[0], (s, v) -> { if (!Is.NA(v)) { s[0] += v.doubleValue(); } }, s -> s[0]); }
/** * @param copies the number of copies of each element * @return an aggregator that repeats each value {@code copies} times. */ public static <T> Aggregator<T, Vector, ?> each(Supplier<Vector.Builder> vb, int copies) { return Aggregator.of( vb, (acc, v) -> { for (int i = 0; i < copies; i++) { acc.add(v); } }, Vector.Builder::build); }
/** * Returns an aggregator that is able to filter values. * * @param supplier the vector builder * @param predicate the predicate. If {@code true} include value. * @param <T> the input type * @return a filtering aggregator */ public static <T> Aggregator<T, Vector, ?> filter( Supplier<Vector.Builder> supplier, Predicate<T> predicate) { return Aggregator.of( supplier, (acc, v) -> { if (predicate.test(v)) { acc.add(v); } }, Vector.Builder::build); }
public static Aggregator<Number, Double, ?> var() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v.doubleValue()); } }, (stat) -> { if (stat.getN() == 0) { return Na.from(Double.class); } return stat.getVariance(); }); }
public static <T> Aggregator<T, Vector, ?> repeat(Supplier<Vector.Builder> vb, int copies) { return Aggregator.of( vb, Vector.Builder::add, (v) -> { Vector temp = v.getTemporaryVector(); int size = temp.size(); for (int i = 1; i < copies; i++) { for (int j = 0; j < size; j++) { v.add(temp, j); } } return v.build(); }); }
public static Aggregator<Number, Number, ?> min() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v.doubleValue()); } }, (r) -> { if (r.getN() == 0) { return Na.from(Double.class); } else { return r.getMin(); } }); }
public static Aggregator<Double, Double, ?> max() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v); } }, (r) -> { if (r.getN() == 0) { return Na.from(Double.class); } else { return r.getMax(); } }); }
public static <T> Aggregator<T, T, ?> reducing(BinaryOperator<T> operator) { class Value implements Consumer<T> { private T value; @Override public void accept(T t) { if (value == null) { value = t; } else { operator.apply(value, t); } } } return Aggregator.of(Value::new, Value::accept, (acc) -> acc.value); }
public static <T> Aggregator<T, Vector, ?> valueCounts() { return Aggregator.of( () -> new HashMap<T, Integer>(), (map, t) -> map.compute(t, (v, c) -> c == null ? 1 : c + 1), (map) -> { Vector.Builder b = Vec.inferringBuilder(); Index.Builder ib = new HashIndex.Builder(); for (Map.Entry<T, Integer> e : map.entrySet()) { b.add(e.getValue()); ib.add(e.getKey()); } Vector v = b.build(); v.setIndex(ib.build()); return v; }); }
public static <T> Aggregator<T, Map<T, Double>, ?> normalizedValueCounts() { class MapCounter { private HashMap<T, Integer> map = new HashMap<>(); private int count = 0; } return Aggregator.of( MapCounter::new, new BiConsumer<MapCounter, T>() { @Override public void accept(MapCounter mapCounter, T t) { mapCounter.count++; mapCounter.map.compute(t, (v, c) -> c == null ? 1 : c + 1); } }, mapCounter -> { Map<T, Double> map = new HashMap<>(); mapCounter .map .entrySet() .forEach(e -> map.put(e.getKey(), e.getValue() / (double) mapCounter.count)); return map; }); }
/** * Performs a transformation operation, mapping each element to a new value, adding it to the * {@code Vector.Builder} finishing it constructing a new {@code Vector}. * * @param supplier supply the vector builder * @param function the mapper * @param <T> the input type * @param <O> the output type * @return a transformation aggregator */ public static <T, O> Aggregator<T, Vector, ?> transform( Supplier<Vector.Builder> supplier, Function<? super T, ? extends O> function) { return Aggregator.of(supplier, (acc, v) -> acc.add(function.apply(v)), Vector.Builder::build); }
public static <T> Aggregator<T, Integer, ?> nunique() { return Aggregator.of(HashSet::new, HashSet::add, HashSet::size); }
public static <T> Aggregator<T, Vector, ?> unique() { return Aggregator.of( HashSet::new, HashSet::add, (set) -> Vec.inferringBuilder().addAll(set).build()); }