/**
  * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
  * of their occurrences.
  */
 public static <S> PTable<S, Long> count(PCollection<S> collect) {
   PTypeFamily tf = collect.getTypeFamily();
   return collect
       .parallelDo(
           "Aggregate.count",
           new MapFn<S, Pair<S, Long>>() {
             public Pair<S, Long> map(S input) {
               return Pair.of(input, 1L);
             }
           },
           tf.tableOf(collect.getPType(), tf.longs()))
       .groupByKey()
       .combineValues(Aggregators.SUM_LONGS());
 }
 /**
  * Returns the number of elements in the provided PCollection.
  *
  * @param collect The PCollection whose elements should be counted.
  * @param <S> The type of the PCollection.
  * @return A {@code PObject} containing the number of elements in the {@code PCollection}.
  */
 public static <S> PObject<Long> length(PCollection<S> collect) {
   PTypeFamily tf = collect.getTypeFamily();
   PTable<Integer, Long> countTable =
       collect
           .parallelDo(
               "Aggregate.count",
               new MapFn<S, Pair<Integer, Long>>() {
                 public Pair<Integer, Long> map(S input) {
                   return Pair.of(1, 1L);
                 }
               },
               tf.tableOf(tf.ints(), tf.longs()))
           .groupByKey()
           .combineValues(Aggregators.SUM_LONGS());
   PCollection<Long> count = countTable.values();
   return new FirstElementPObject<Long>(count);
 }
Beispiel #3
0
 @Override
 public PTable<K, V> combineValues(Aggregator<V> combineAgg, Aggregator<V> reduceAgg) {
   return combineValues(
       Aggregators.<K, V>toCombineFn(combineAgg, parent.getValueType()),
       Aggregators.<K, V>toCombineFn(reduceAgg, parent.getValueType()));
 }