Exemplo n.º 1
0
  @Test
  public void testSave() {
    DataFrame df = (new XmlReader()).withRowTag(booksFileTag).xmlFile(sqlContext, booksFile);
    TestUtils.deleteRecursively(new File(tempDir));
    df.select("price", "description").save(tempDir, "com.databricks.spark.xml");

    DataFrame newDf = (new XmlReader()).xmlFile(sqlContext, tempDir);
    int result = newDf.select("price").collect().length;
    Assert.assertEquals(result, numBooks);
  }
Exemplo n.º 2
0
 void validateDataFrameWithBeans(Bean bean, DataFrame df) {
   StructType schema = df.schema();
   Assert.assertEquals(
       new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a"));
   Assert.assertEquals(
       new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()),
       schema.apply("b"));
   ArrayType valueType = new ArrayType(DataTypes.IntegerType, false);
   MapType mapType = new MapType(DataTypes.StringType, valueType, true);
   Assert.assertEquals(new StructField("c", mapType, true, Metadata.empty()), schema.apply("c"));
   Assert.assertEquals(
       new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()),
       schema.apply("d"));
   Row first = df.select("a", "b", "c", "d").first();
   Assert.assertEquals(bean.getA(), first.getDouble(0), 0.0);
   // Now Java lists and maps are converted to Scala Seq's and Map's. Once we get a Seq below,
   // verify that it has the expected length, and contains expected elements.
   Seq<Integer> result = first.getAs(1);
   Assert.assertEquals(bean.getB().length, result.length());
   for (int i = 0; i < result.length(); i++) {
     Assert.assertEquals(bean.getB()[i], result.apply(i));
   }
   @SuppressWarnings("unchecked")
   Seq<Integer> outputBuffer = (Seq<Integer>) first.getJavaMap(2).get("hello");
   Assert.assertArrayEquals(
       bean.getC().get("hello"),
       Ints.toArray(JavaConverters.seqAsJavaListConverter(outputBuffer).asJava()));
   Seq<String> d = first.getAs(3);
   Assert.assertEquals(bean.getD().size(), d.length());
   for (int i = 0; i < d.length(); i++) {
     Assert.assertEquals(bean.getD().get(i), d.apply(i));
   }
 }
Exemplo n.º 3
0
  @Test
  public void testLoad() {
    HashMap<String, String> options = new HashMap<String, String>();
    options.put("rowTag", booksFileTag);
    options.put("path", booksFile);

    DataFrame df = sqlContext.load("com.databricks.spark.xml", options);
    int result = df.select("description").collect().length;
    Assert.assertEquals(result, numBooks);
  }
Exemplo n.º 4
0
  /** See SPARK-5904. Abstract vararg methods defined in Scala do not work in Java. */
  @Test
  public void testVarargMethods() {
    DataFrame df = context.table("testData");

    df.toDF("key1", "value1");

    df.select("key", "value");
    df.select(col("key"), col("value"));
    df.selectExpr("key", "value + 1");

    df.sort("key", "value");
    df.sort(col("key"), col("value"));
    df.orderBy("key", "value");
    df.orderBy(col("key"), col("value"));

    df.groupBy("key", "value").agg(col("key"), col("value"), sum("value"));
    df.groupBy(col("key"), col("value")).agg(col("key"), col("value"), sum("value"));
    df.agg(first("key"), sum("value"));

    df.groupBy().avg("key");
    df.groupBy().mean("key");
    df.groupBy().max("key");
    df.groupBy().min("key");
    df.groupBy().stddev("key");
    df.groupBy().sum("key");

    // Varargs in column expressions
    df.groupBy().agg(countDistinct("key", "value"));
    df.groupBy().agg(countDistinct(col("key"), col("value")));
    df.select(coalesce(col("key")));

    // Varargs with mathfunctions
    DataFrame df2 = context.table("testData2");
    df2.select(exp("a"), exp("b"));
    df2.select(exp(log("a")));
    df2.select(pow("a", "a"), pow("b", 2.0));
    df2.select(pow(col("a"), col("b")), exp("b"));
    df2.select(sin("a"), acos("b"));

    df2.select(rand(), acos("b"));
    df2.select(col("*"), randn(5L));
  }
Exemplo n.º 5
0
 @Test
 public void testCollectAndTake() {
   DataFrame df = context.table("testData").filter("key = 1 or key = 2 or key = 3");
   Assert.assertEquals(3, df.select("key").collectAsList().size());
   Assert.assertEquals(2, df.select("key").takeAsList(2).size());
 }
Exemplo n.º 6
0
 @Test
 public void testExecution() {
   DataFrame df = context.table("testData").filter("key = 1");
   Assert.assertEquals(1, df.select("key").collect()[0].get(0));
 }
Exemplo n.º 7
0
 @Test
 public void testXmlParser() {
   DataFrame df = (new XmlReader()).withRowTag(booksFileTag).xmlFile(sqlContext, booksFile);
   int result = df.select("id").collect().length;
   Assert.assertEquals(result, numBooks);
 }