コード例 #1
0
ファイル: CrunchDatasets.java プロジェクト: rbrush/kite
 private static <E> PCollection<E> partition(PCollection<E> collection, int numReducers) {
   PType<E> type = collection.getPType();
   PTableType<E, Void> tableType = Avros.tableOf(type, Avros.nulls());
   PTable<E, Void> table = collection.parallelDo(new AsKeyTable<E>(), tableType);
   PGroupedTable<E, Void> grouped =
       numReducers > 0 ? table.groupByKey(numReducers) : table.groupByKey();
   return grouped.ungroup().keys();
 }
コード例 #2
0
  /** Returns the smallest numerical element from the input collection. */
  public static <S> PObject<S> min(PCollection<S> collect) {
    Class<S> clazz = collect.getPType().getTypeClass();
    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
      throw new IllegalArgumentException(
          "Can only get min for Comparable elements, not for: "
              + collect.getPType().getTypeClass());
    }
    PTypeFamily tf = collect.getTypeFamily();
    PCollection<S> minCollect =
        PTables.values(
            collect
                .parallelDo(
                    "min",
                    new DoFn<S, Pair<Boolean, S>>() {
                      private transient S min = null;

                      public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
                        if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
                          min = input;
                        }
                      }

                      public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
                        if (min != null) {
                          emitter.emit(Pair.of(false, min));
                        }
                      }
                    },
                    tf.tableOf(tf.booleans(), collect.getPType()))
                .groupByKey()
                .combineValues(
                    new CombineFn<Boolean, S>() {
                      public void process(
                          Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
                        S min = null;
                        for (S v : input.second()) {
                          if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
                            min = v;
                          }
                        }
                        emitter.emit(Pair.of(input.first(), min));
                      }
                    }));
    return new FirstElementPObject<S>(minCollect);
  }
コード例 #3
0
 /**
  * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
  * of their occurrences.
  */
 public static <S> PTable<S, Long> count(PCollection<S> collect) {
   PTypeFamily tf = collect.getTypeFamily();
   return collect
       .parallelDo(
           "Aggregate.count",
           new MapFn<S, Pair<S, Long>>() {
             public Pair<S, Long> map(S input) {
               return Pair.of(input, 1L);
             }
           },
           tf.tableOf(collect.getPType(), tf.longs()))
       .groupByKey()
       .combineValues(Aggregators.SUM_LONGS());
 }