@Test public void testFrequentItems() { DataFrame df = context.table("testData2"); String[] cols = {"a"}; DataFrame results = df.stat().freqItems(cols, 0.2); Assert.assertTrue(results.collect()[0].getSeq(0).contains(1)); }
@Ignore public void testShow() { // This test case is intended ignored, but to make sure it compiles correctly DataFrame df = context.table("testData"); df.show(); df.show(1000); }
/** See SPARK-5904. Abstract vararg methods defined in Scala do not work in Java. */ @Test public void testVarargMethods() { DataFrame df = context.table("testData"); df.toDF("key1", "value1"); df.select("key", "value"); df.select(col("key"), col("value")); df.selectExpr("key", "value + 1"); df.sort("key", "value"); df.sort(col("key"), col("value")); df.orderBy("key", "value"); df.orderBy(col("key"), col("value")); df.groupBy("key", "value").agg(col("key"), col("value"), sum("value")); df.groupBy(col("key"), col("value")).agg(col("key"), col("value"), sum("value")); df.agg(first("key"), sum("value")); df.groupBy().avg("key"); df.groupBy().mean("key"); df.groupBy().max("key"); df.groupBy().min("key"); df.groupBy().stddev("key"); df.groupBy().sum("key"); // Varargs in column expressions df.groupBy().agg(countDistinct("key", "value")); df.groupBy().agg(countDistinct(col("key"), col("value"))); df.select(coalesce(col("key"))); // Varargs with mathfunctions DataFrame df2 = context.table("testData2"); df2.select(exp("a"), exp("b")); df2.select(exp(log("a"))); df2.select(pow("a", "a"), pow("b", 2.0)); df2.select(pow(col("a"), col("b")), exp("b")); df2.select(sin("a"), acos("b")); df2.select(rand(), acos("b")); df2.select(col("*"), randn(5L)); }
@Test public void pivot() { DataFrame df = context.table("courseSales"); Row[] actual = df.groupBy("year") .pivot("course", Arrays.<Object>asList("dotNET", "Java")) .agg(sum("earnings")) .orderBy("year") .collect(); Assert.assertEquals(2012, actual[0].getInt(0)); Assert.assertEquals(15000.0, actual[0].getDouble(1), 0.01); Assert.assertEquals(20000.0, actual[0].getDouble(2), 0.01); Assert.assertEquals(2013, actual[1].getInt(0)); Assert.assertEquals(48000.0, actual[1].getDouble(1), 0.01); Assert.assertEquals(30000.0, actual[1].getDouble(2), 0.01); }
@Test public void testCrosstab() { DataFrame df = context.table("testData2"); DataFrame crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("2", columnNames[1]); Assert.assertEquals("1", columnNames[2]); Row[] rows = crosstab.collect(); Arrays.sort(rows, crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Test public void testCollectAndTake() { DataFrame df = context.table("testData").filter("key = 1 or key = 2 or key = 3"); Assert.assertEquals(3, df.select("key").collectAsList().size()); Assert.assertEquals(2, df.select("key").takeAsList(2).size()); }
@Test public void testExecution() { DataFrame df = context.table("testData").filter("key = 1"); Assert.assertEquals(1, df.select("key").collect()[0].get(0)); }
@Test public void testCovariance() { DataFrame df = context.table("testData2"); Double result = df.stat().cov("a", "b"); Assert.assertTrue(Math.abs(result) < 1.0e-6); }
@Test public void testCorrelation() { DataFrame df = context.table("testData2"); Double pearsonCorr = df.stat().corr("a", "b", "pearson"); Assert.assertTrue(Math.abs(pearsonCorr) < 1.0e-6); }