public static <T0, T1, R> Func1<Observer<R>, Subscription> zip( Observable<T0> w0, Observable<T1> w1, Func2<T0, T1, R> zipFunction) { Aggregator<R> a = new Aggregator<R>(Functions.fromFunc(zipFunction)); a.addObserver(new ZipObserver<R, T0>(a, w0)); a.addObserver(new ZipObserver<R, T1>(a, w1)); return a; }
@SuppressWarnings("unchecked") public static <R> Func1<Observer<R>, Subscription> zip( Collection<Observable<?>> ws, FuncN<R> zipFunction) { Aggregator a = new Aggregator(zipFunction); for (Observable w : ws) { ZipObserver zipObserver = new ZipObserver(a, w); a.addObserver(zipObserver); } return a; }
public static Aggregator<Object, String, ?> join( CharSequence delimit, CharSequence prefix, CharSequence suffix) { return Aggregator.of( () -> new StringJoiner(delimit, prefix, suffix), (j, s) -> j.add(!Is.NA(s) ? s.toString() : "NA"), StringJoiner::toString); }
/** @return an aggregator that computes the median. */ public static Aggregator<Number, Double, ?> median() { Aggregator<Number, Double, ? extends List<Number>> of = Aggregator.of( ArrayList::new, ArrayList::add, (list) -> { int size = list.size(); if (size == 0) { return Na.from(Double.class); } else if (size == 1) { return list.get(0).doubleValue(); } else if (size == 2) { return (list.get(0).doubleValue() + list.get(1).doubleValue()) / 2; } else { Collections.sort(list, (a, b) -> Double.compare(a.doubleValue(), b.doubleValue())); int index = (size - 1) / 2; if (size % 2 == 0) { return (list.get(index).doubleValue() + list.get(index + 1).doubleValue()) / 2; } else { return list.get(index).doubleValue(); } } }); return of; }
@Override public void onNext(T args) { try { a.next(this, args); } catch (Exception e) { onError(e); } }
@SuppressWarnings("unchecked") /* mock calls don't do generics */ @Test public void testAggregate3Types() { FuncN<String> zipr = getConcatZipr(); /* create the aggregator which will execute the zip function when all Observables provide values */ Aggregator<String> a = new Aggregator<String>(zipr); /* define a Observer to receive aggregated events */ Observer<String> aObserver = mock(Observer.class); a.call(aObserver); /* mock the Observable Observers that are 'pushing' data for us */ ZipObserver<String, String> r1 = mock(ZipObserver.class); ZipObserver<String, Integer> r2 = mock(ZipObserver.class); ZipObserver<String, int[]> r3 = mock(ZipObserver.class); /* pretend we're starting up */ a.addObserver(r1); a.addObserver(r2); a.addObserver(r3); /* simulate the Observables pushing data into the aggregator */ a.next(r1, "hello"); a.next(r2, 2); a.next(r3, new int[] {5, 6, 7}); verify(aObserver, never()).onError(any(Exception.class)); verify(aObserver, never()).onCompleted(); verify(aObserver, times(1)).onNext("hello2[5, 6, 7]"); }
public Aggregator<T, R> end() { BasicDBObject params = new BasicDBObject(); params.putAll((DBObject) id); for (BasicDBObject o : operators) { params.putAll((DBObject) o); } DBObject obj = new BasicDBObject("$group", params); aggregator.addOperator(obj); return aggregator; }
public static Aggregator<Number, Double, ?> sum() { return Aggregator.of( () -> new double[0], (s, v) -> { if (!Is.NA(v)) { s[0] += v.doubleValue(); } }, s -> s[0]); }
/** * @param copies the number of copies of each element * @return an aggregator that repeats each value {@code copies} times. */ public static <T> Aggregator<T, Vector, ?> each(Supplier<Vector.Builder> vb, int copies) { return Aggregator.of( vb, (acc, v) -> { for (int i = 0; i < copies; i++) { acc.add(v); } }, Vector.Builder::build); }
public static <T> Aggregator<T, Integer, ?> count() { return Aggregator.of( () -> new int[1], (int[] a, T b) -> { if (!Is.NA(b)) { a[0] += 1; } }, (int[] a) -> a[0]); }
/** * @param <T> * @param values * @param aggregator * @return */ public static Number aggregate( Collection<Number> values, Class<? extends Aggregator> aggregator) { if (values == null) { return null; } Aggregator a = null; if (aggregator == null) { throw new IllegalArgumentException("Aggregator must not be null."); } try { a = aggregator.newInstance(); } catch (Exception e) { throw new APIException("Unable to instantiate aggregator " + aggregator, e); } return a.compute(values); }
/** * Returns an aggregator that is able to filter values. * * @param supplier the vector builder * @param predicate the predicate. If {@code true} include value. * @param <T> the input type * @return a filtering aggregator */ public static <T> Aggregator<T, Vector, ?> filter( Supplier<Vector.Builder> supplier, Predicate<T> predicate) { return Aggregator.of( supplier, (acc, v) -> { if (predicate.test(v)) { acc.add(v); } }, Vector.Builder::build); }
/** Tests getting the data for a desktop client */ @Test public void testGetProduct() { String title = "The Product Title."; int inventories = 5; when(informationClient.getProductTitle()).thenReturn(title); when(inventoryClient.getProductInventories()).thenReturn(inventories); Product testProduct = aggregator.getProduct(); assertEquals(title, testProduct.getTitle()); assertEquals(inventories, testProduct.getProductInventories()); }
@SuppressWarnings("unchecked") /* mock calls don't do generics */ @Test public void testAggregatorUnsubscribe() { FuncN<String> zipr = getConcatZipr(); /* create the aggregator which will execute the zip function when all Observables provide values */ Aggregator<String> a = new Aggregator<String>(zipr); /* define a Observer to receive aggregated events */ Observer<String> aObserver = mock(Observer.class); Subscription subscription = a.call(aObserver); /* mock the Observable Observers that are 'pushing' data for us */ ZipObserver<String, String> r1 = mock(ZipObserver.class); ZipObserver<String, String> r2 = mock(ZipObserver.class); /* pretend we're starting up */ a.addObserver(r1); a.addObserver(r2); /* simulate the Observables pushing data into the aggregator */ a.next(r1, "hello"); a.next(r2, "world"); verify(aObserver, never()).onError(any(Exception.class)); verify(aObserver, never()).onCompleted(); verify(aObserver, times(1)).onNext("helloworld"); subscription.unsubscribe(); a.next(r1, "hello"); a.next(r2, "again"); verify(aObserver, times(0)).onError(any(Exception.class)); verify(aObserver, never()).onCompleted(); // we don't want to be called again after an error verify(aObserver, times(0)).onNext("helloagain"); }
public static <T> Aggregator<T, Vector, ?> repeat(Supplier<Vector.Builder> vb, int copies) { return Aggregator.of( vb, Vector.Builder::add, (v) -> { Vector temp = v.getTemporaryVector(); int size = temp.size(); for (int i = 1; i < copies; i++) { for (int j = 0; j < size; j++) { v.add(temp, j); } } return v.build(); }); }
public static Aggregator<Number, Double, ?> var() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v.doubleValue()); } }, (stat) -> { if (stat.getN() == 0) { return Na.from(Double.class); } return stat.getVariance(); }); }
public static <T> Aggregator<T, Vector, ?> valueCounts() { return Aggregator.of( () -> new HashMap<T, Integer>(), (map, t) -> map.compute(t, (v, c) -> c == null ? 1 : c + 1), (map) -> { Vector.Builder b = Vec.inferringBuilder(); Index.Builder ib = new HashIndex.Builder(); for (Map.Entry<T, Integer> e : map.entrySet()) { b.add(e.getValue()); ib.add(e.getKey()); } Vector v = b.build(); v.setIndex(ib.build()); return v; }); }
public static <T> Aggregator<T, T, ?> reducing(BinaryOperator<T> operator) { class Value implements Consumer<T> { private T value; @Override public void accept(T t) { if (value == null) { value = t; } else { operator.apply(value, t); } } } return Aggregator.of(Value::new, Value::accept, (acc) -> acc.value); }
@Override public void processElement(ProcessContext c) { if (c.element().trim().isEmpty()) { emptyLines.addValue(1L); } // Split the line into words. String[] words = c.element().split("[^a-zA-Z']+"); // Output each word encountered into the output PCollection. for (String word : words) { if (!word.isEmpty()) { c.output(word); } } }
public static Aggregator<Double, Double, ?> max() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v); } }, (r) -> { if (r.getN() == 0) { return Na.from(Double.class); } else { return r.getMax(); } }); }
public static Aggregator<Number, Number, ?> min() { return Aggregator.of( FastStatistics::new, (a, v) -> { if (!Is.NA(v)) { a.addValue(v.doubleValue()); } }, (r) -> { if (r.getN() == 0) { return Na.from(Double.class); } else { return r.getMin(); } }); }
public static <T> Aggregator<T, Map<T, Double>, ?> normalizedValueCounts() { class MapCounter { private HashMap<T, Integer> map = new HashMap<>(); private int count = 0; } return Aggregator.of( MapCounter::new, new BiConsumer<MapCounter, T>() { @Override public void accept(MapCounter mapCounter, T t) { mapCounter.count++; mapCounter.map.compute(t, (v, c) -> c == null ? 1 : c + 1); } }, mapCounter -> { Map<T, Double> map = new HashMap<>(); mapCounter .map .entrySet() .forEach(e -> map.put(e.getKey(), e.getValue() / (double) mapCounter.count)); return map; }); }
@SuppressWarnings("unchecked") /* mock calls don't do generics */ @Test public void testAggregatorEarlyCompletion() { FuncN<String> zipr = getConcatZipr(); /* create the aggregator which will execute the zip function when all Observables provide values */ Aggregator<String> a = new Aggregator<String>(zipr); /* define a Observer to receive aggregated events */ Observer<String> aObserver = mock(Observer.class); a.call(aObserver); /* mock the Observable Observers that are 'pushing' data for us */ ZipObserver<String, String> r1 = mock(ZipObserver.class); ZipObserver<String, String> r2 = mock(ZipObserver.class); /* pretend we're starting up */ a.addObserver(r1); a.addObserver(r2); /* simulate the Observables pushing data into the aggregator */ a.next(r1, "one"); a.next(r1, "two"); a.complete(r1); a.next(r2, "A"); InOrder inOrder = inOrder(aObserver); inOrder.verify(aObserver, never()).onError(any(Exception.class)); inOrder.verify(aObserver, never()).onCompleted(); inOrder.verify(aObserver, times(1)).onNext("oneA"); a.complete(r2); inOrder.verify(aObserver, never()).onError(any(Exception.class)); inOrder.verify(aObserver, times(1)).onCompleted(); inOrder.verify(aObserver, never()).onNext(anyString()); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // creating data structures for building aggregates List<AggregationFunction> aggregationFunctions = createAggreationFunctions(exampleSet); // getting attributes that define groups and weights Attribute[] groupAttributes = getMatchingAttributes( exampleSet.getAttributes(), getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); Attribute weightAttribute = exampleSet.getAttributes().getWeight(); boolean useWeights = weightAttribute != null; // running over exampleSet and aggregate data of each example AggregationTreeNode rootNode = new AggregationTreeNode(); LeafAggregationTreeNode leafNode = null; if (groupAttributes.length == 0) { // if no grouping, we will directly insert into leaf node leafNode = new LeafAggregationTreeNode(aggregationFunctions); } for (Example example : exampleSet) { if (groupAttributes.length > 0) { AggregationTreeNode currentNode = rootNode; // now traversing aggregation tree for m-1 group attributes for (int i = 0; i < groupAttributes.length - 1; i++) { Attribute currentAttribute = groupAttributes[i]; if (currentAttribute.isNominal()) { currentNode = currentNode.getOrCreateChild(example.getValueAsString(currentAttribute)); } else { currentNode = currentNode.getOrCreateChild(example.getValue(currentAttribute)); } } // now we have to get the leaf node containing the aggregators Attribute currentAttribute = groupAttributes[groupAttributes.length - 1]; if (currentAttribute.isNominal()) { leafNode = currentNode.getOrCreateLeaf( example.getValueAsString(currentAttribute), aggregationFunctions); } else { leafNode = currentNode.getOrCreateLeaf(example.getValue(currentAttribute), aggregationFunctions); } } // now count current example if (!useWeights) leafNode.count(example); else leafNode.count(example, example.getValue(weightAttribute)); } // now derive new example set from aggregated values boolean isCountingAllCombinations = getParameterAsBoolean(PARAMETER_ALL_COMBINATIONS); // building new attributes from grouping attributes and aggregation functions Attribute[] newAttributes = new Attribute[groupAttributes.length + aggregationFunctions.size()]; for (int i = 0; i < groupAttributes.length; i++) { newAttributes[i] = AttributeFactory.createAttribute(groupAttributes[i]); } int i = groupAttributes.length; for (AggregationFunction function : aggregationFunctions) { newAttributes[i] = function.getTargetAttribute(); i++; } // creating example table MemoryExampleTable table = new MemoryExampleTable(newAttributes); ; DataRowFactory factory = new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.'); double[] dataOfUpperLevels = new double[groupAttributes.length]; // prepare empty lists ArrayList<List<Aggregator>> allAggregators = new ArrayList<List<Aggregator>>(); for (int aggregatorIdx = 0; aggregatorIdx < aggregationFunctions.size(); ++aggregatorIdx) { allAggregators.add(new ArrayList<Aggregator>()); } ArrayList<double[]> allGroupCombinations = new ArrayList<double[]>(); if (groupAttributes.length > 0) { // going through all possible groups recursively parseTree( rootNode, groupAttributes, dataOfUpperLevels, 0, allGroupCombinations, allAggregators, factory, newAttributes, isCountingAllCombinations, aggregationFunctions); } else { // just enter values from single leaf node parseLeaf( leafNode, dataOfUpperLevels, allGroupCombinations, allAggregators, factory, newAttributes, aggregationFunctions); } // apply post-processing int currentFunctionIdx = 0; for (AggregationFunction aggregationFunction : aggregationFunctions) { aggregationFunction.postProcessing(allAggregators.get(currentFunctionIdx)); ++currentFunctionIdx; } // write data into table int currentRow = 0; for (double[] groupValues : allGroupCombinations) { double[] rowData = new double[newAttributes.length]; // copy group values into row System.arraycopy(groupValues, 0, rowData, 0, groupValues.length); DoubleArrayDataRow dataRow = new DoubleArrayDataRow(rowData); // copy aggregated values into row int currentColumn = groupValues.length; for (List<Aggregator> aggregatorsForColumn : allAggregators) { Aggregator aggregatorForCurrentCell = aggregatorsForColumn.get(currentRow); Attribute currentAttribute = newAttributes[currentColumn]; if (aggregatorForCurrentCell != null) { aggregatorForCurrentCell.set(currentAttribute, dataRow); } else { aggregationFunctions .get(currentColumn - groupAttributes.length) .setDefault(currentAttribute, dataRow); } ++currentColumn; } table.addDataRow(dataRow); ++currentRow; } // postprocessing for remaining compatibility: Old versions automatically added group "all". // Must remain this way for old operator // version if (getCompatibilityLevel().isAtMost(VERSION_5_1_6)) { if (groupAttributes.length == 0) { Attribute resultGroupAttribute = AttributeFactory.createAttribute(GENERIC_GROUP_NAME, Ontology.NOMINAL); table.addAttribute(resultGroupAttribute); table .getDataRow(0) .set( resultGroupAttribute, resultGroupAttribute.getMapping().mapString(GENERIC_ALL_NAME)); ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); for (Attribute attribute : newAttributes) { resultSet.getAttributes().remove(attribute); resultSet.getAttributes().addRegular(attribute); } return resultSet; } else { // make attributes nominal ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); try { NumericToNominal toNominalOperator = OperatorService.createOperator(NumericToPolynominal.class); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_FILTER_TYPE, AttributeSubsetSelector.CONDITION_REGULAR_EXPRESSION + ""); toNominalOperator.setParameter( RegexpAttributeFilter.PARAMETER_REGULAR_EXPRESSION, getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_INCLUDE_SPECIAL_ATTRIBUTES, "true"); return toNominalOperator.apply(resultSet); } catch (OperatorCreationException e) { // otherwise compatibility could not be ensured return resultSet; } } } // for recent version table is correct: Deliver example set ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); return resultSet; }
/** * This will count the given examples for all registered {@link Aggregator}s with the given * weight. If there's no weight attribute available, it is preferable to use the {@link * #count(Example)} method, as it might be more efficiently implemented. */ public void count(Example example, double weight) { for (Aggregator aggregator : aggregators) { aggregator.count(example, weight); } }
/** This will count the given examples for all registered {@link Aggregator}s. */ public void count(Example example) { for (Aggregator aggregator : aggregators) { aggregator.count(example); } }
@Test public void test_createSamplerAggregator() { Aggregator aggregator = AggregatorFactory.createSamplerAggregator(); assertThat(aggregator.getName(), equalTo("sampler")); }
@Test public void test_createDiffAggregator() { Aggregator aggregator = AggregatorFactory.createDiffAggregator(); assertThat(aggregator.getName(), equalTo("diff")); }
/** {@inheritDoc} */ @Override public void start(final EmitKey key) { super.start(key); this.sum = 0; }
@Override public void onError(Exception e) { a.error(this, e); }