Example #1
0
  @Test
  public void testTypedAggregation() {
    Encoder<Tuple2<String, Integer>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.INT());
    List<Tuple2<String, Integer>> data =
        Arrays.asList(tuple2("a", 1), tuple2("a", 2), tuple2("b", 3));
    Dataset<Tuple2<String, Integer>> ds = context.createDataset(data, encoder);

    GroupedDataset<String, Tuple2<String, Integer>> grouped =
        ds.groupBy(
            new MapFunction<Tuple2<String, Integer>, String>() {
              @Override
              public String call(Tuple2<String, Integer> value) throws Exception {
                return value._1();
              }
            },
            Encoders.STRING());

    Dataset<Tuple2<String, Integer>> agged =
        grouped.agg(new IntSumOf().toColumn(Encoders.INT(), Encoders.INT()));
    Assert.assertEquals(Arrays.asList(tuple2("a", 3), tuple2("b", 3)), agged.collectAsList());

    Dataset<Tuple2<String, Integer>> agged2 =
        grouped
            .agg(new IntSumOf().toColumn(Encoders.INT(), Encoders.INT()))
            .as(Encoders.tuple(Encoders.STRING(), Encoders.INT()));
    Assert.assertEquals(
        Arrays.asList(new Tuple2<>("a", 3), new Tuple2<>("b", 3)), agged2.collectAsList());
  }
Example #2
0
  @Test
  public void testTupleEncoder() {
    Encoder<Tuple2<Integer, String>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.STRING());
    List<Tuple2<Integer, String>> data2 = Arrays.asList(tuple2(1, "a"), tuple2(2, "b"));
    Dataset<Tuple2<Integer, String>> ds2 = context.createDataset(data2, encoder2);
    Assert.assertEquals(data2, ds2.collectAsList());

    Encoder<Tuple3<Integer, Long, String>> encoder3 =
        Encoders.tuple(Encoders.INT(), Encoders.LONG(), Encoders.STRING());
    List<Tuple3<Integer, Long, String>> data3 =
        Arrays.asList(new Tuple3<Integer, Long, String>(1, 2L, "a"));
    Dataset<Tuple3<Integer, Long, String>> ds3 = context.createDataset(data3, encoder3);
    Assert.assertEquals(data3, ds3.collectAsList());

    Encoder<Tuple4<Integer, String, Long, String>> encoder4 =
        Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING());
    List<Tuple4<Integer, String, Long, String>> data4 =
        Arrays.asList(new Tuple4<Integer, String, Long, String>(1, "b", 2L, "a"));
    Dataset<Tuple4<Integer, String, Long, String>> ds4 = context.createDataset(data4, encoder4);
    Assert.assertEquals(data4, ds4.collectAsList());

    Encoder<Tuple5<Integer, String, Long, String, Boolean>> encoder5 =
        Encoders.tuple(
            Encoders.INT(),
            Encoders.STRING(),
            Encoders.LONG(),
            Encoders.STRING(),
            Encoders.BOOLEAN());
    List<Tuple5<Integer, String, Long, String, Boolean>> data5 =
        Arrays.asList(new Tuple5<Integer, String, Long, String, Boolean>(1, "b", 2L, "a", true));
    Dataset<Tuple5<Integer, String, Long, String, Boolean>> ds5 =
        context.createDataset(data5, encoder5);
    Assert.assertEquals(data5, ds5.collectAsList());
  }
Example #3
0
  @Test
  public void testNestedTupleEncoder() {
    // test ((int, string), string)
    Encoder<Tuple2<Tuple2<Integer, String>, String>> encoder =
        Encoders.tuple(Encoders.tuple(Encoders.INT(), Encoders.STRING()), Encoders.STRING());
    List<Tuple2<Tuple2<Integer, String>, String>> data =
        Arrays.asList(tuple2(tuple2(1, "a"), "a"), tuple2(tuple2(2, "b"), "b"));
    Dataset<Tuple2<Tuple2<Integer, String>, String>> ds = context.createDataset(data, encoder);
    Assert.assertEquals(data, ds.collectAsList());

    // test (int, (string, string, long))
    Encoder<Tuple2<Integer, Tuple3<String, String, Long>>> encoder2 =
        Encoders.tuple(
            Encoders.INT(), Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.LONG()));
    List<Tuple2<Integer, Tuple3<String, String, Long>>> data2 =
        Arrays.asList(tuple2(1, new Tuple3<String, String, Long>("a", "b", 3L)));
    Dataset<Tuple2<Integer, Tuple3<String, String, Long>>> ds2 =
        context.createDataset(data2, encoder2);
    Assert.assertEquals(data2, ds2.collectAsList());

    // test (int, ((string, long), string))
    Encoder<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> encoder3 =
        Encoders.tuple(
            Encoders.INT(),
            Encoders.tuple(Encoders.tuple(Encoders.STRING(), Encoders.LONG()), Encoders.STRING()));
    List<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> data3 =
        Arrays.asList(tuple2(1, tuple2(tuple2("a", 2L), "b")));
    Dataset<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> ds3 =
        context.createDataset(data3, encoder3);
    Assert.assertEquals(data3, ds3.collectAsList());
  }
Example #4
0
  @Test
  public void testSelect() {
    List<Integer> data = Arrays.asList(2, 6);
    Dataset<Integer> ds = context.createDataset(data, Encoders.INT());

    Dataset<Tuple2<Integer, String>> selected =
        ds.select(expr("value + 1"), col("value").cast("string"))
            .as(Encoders.tuple(Encoders.INT(), Encoders.STRING()));

    Assert.assertEquals(Arrays.asList(tuple2(3, "2"), tuple2(7, "6")), selected.collectAsList());
  }
Example #5
0
 @Test
 public void testPrimitiveEncoder() {
   Encoder<Tuple5<Double, BigDecimal, Date, Timestamp, Float>> encoder =
       Encoders.tuple(
           Encoders.DOUBLE(),
           Encoders.DECIMAL(),
           Encoders.DATE(),
           Encoders.TIMESTAMP(),
           Encoders.FLOAT());
   List<Tuple5<Double, BigDecimal, Date, Timestamp, Float>> data =
       Arrays.asList(
           new Tuple5<Double, BigDecimal, Date, Timestamp, Float>(
               1.7976931348623157E308,
               new BigDecimal("0.922337203685477589"),
               Date.valueOf("1970-01-01"),
               new Timestamp(System.currentTimeMillis()),
               Float.MAX_VALUE));
   Dataset<Tuple5<Double, BigDecimal, Date, Timestamp, Float>> ds =
       context.createDataset(data, encoder);
   Assert.assertEquals(data, ds.collectAsList());
 }