Example #1
0
  @Test
  public void testGroupByColumn() {
    List<String> data = Arrays.asList("a", "foo", "bar");
    Dataset<String> ds = context.createDataset(data, Encoders.STRING());
    GroupedDataset<Integer, String> grouped =
        ds.groupBy(length(col("value"))).keyAs(Encoders.INT());

    Dataset<String> mapped =
        grouped.mapGroups(
            new MapGroupsFunction<Integer, String, String>() {
              @Override
              public String call(Integer key, Iterator<String> data) throws Exception {
                StringBuilder sb = new StringBuilder(key.toString());
                while (data.hasNext()) {
                  sb.append(data.next());
                }
                return sb.toString();
              }
            },
            Encoders.STRING());

    Assert.assertEquals(Arrays.asList("1a", "3foobar"), mapped.collectAsList());
  }
Example #2
0
  @Test
  public void testGroupBy() {
    List<String> data = Arrays.asList("a", "foo", "bar");
    Dataset<String> ds = context.createDataset(data, Encoders.STRING());
    GroupedDataset<Integer, String> grouped =
        ds.groupBy(
            new MapFunction<String, Integer>() {
              @Override
              public Integer call(String v) throws Exception {
                return v.length();
              }
            },
            Encoders.INT());

    Dataset<String> mapped =
        grouped.mapGroups(
            new MapGroupsFunction<Integer, String, String>() {
              @Override
              public String call(Integer key, Iterator<String> values) throws Exception {
                StringBuilder sb = new StringBuilder(key.toString());
                while (values.hasNext()) {
                  sb.append(values.next());
                }
                return sb.toString();
              }
            },
            Encoders.STRING());

    Assert.assertEquals(Arrays.asList("1a", "3foobar"), mapped.collectAsList());

    Dataset<String> flatMapped =
        grouped.flatMapGroups(
            new FlatMapGroupsFunction<Integer, String, String>() {
              @Override
              public Iterable<String> call(Integer key, Iterator<String> values) throws Exception {
                StringBuilder sb = new StringBuilder(key.toString());
                while (values.hasNext()) {
                  sb.append(values.next());
                }
                return Collections.singletonList(sb.toString());
              }
            },
            Encoders.STRING());

    Assert.assertEquals(Arrays.asList("1a", "3foobar"), flatMapped.collectAsList());

    Dataset<Tuple2<Integer, String>> reduced =
        grouped.reduce(
            new ReduceFunction<String>() {
              @Override
              public String call(String v1, String v2) throws Exception {
                return v1 + v2;
              }
            });

    Assert.assertEquals(
        Arrays.asList(tuple2(1, "a"), tuple2(3, "foobar")), reduced.collectAsList());

    List<Integer> data2 = Arrays.asList(2, 6, 10);
    Dataset<Integer> ds2 = context.createDataset(data2, Encoders.INT());
    GroupedDataset<Integer, Integer> grouped2 =
        ds2.groupBy(
            new MapFunction<Integer, Integer>() {
              @Override
              public Integer call(Integer v) throws Exception {
                return v / 2;
              }
            },
            Encoders.INT());

    Dataset<String> cogrouped =
        grouped.cogroup(
            grouped2,
            new CoGroupFunction<Integer, String, Integer, String>() {
              @Override
              public Iterable<String> call(
                  Integer key, Iterator<String> left, Iterator<Integer> right) throws Exception {
                StringBuilder sb = new StringBuilder(key.toString());
                while (left.hasNext()) {
                  sb.append(left.next());
                }
                sb.append("#");
                while (right.hasNext()) {
                  sb.append(right.next());
                }
                return Collections.singletonList(sb.toString());
              }
            },
            Encoders.STRING());

    Assert.assertEquals(Arrays.asList("1a#2", "3foobar#6", "5#10"), cogrouped.collectAsList());
  }