private <T> void assertExpectedTokens(LuceneAnalyzer analyzer, List<T> testData) { JavaRDD<T> rdd = jsc.parallelize(testData); Row[] pairs = analyzer .transform(jsql.createDataFrame(rdd, testData.get(0).getClass())) .select("wantedTokens", "tokens") .collect(); for (Row r : pairs) { Assert.assertEquals(r.get(0), r.get(1)); } }
@Test public void testCrosstab() { DataFrame df = context.table("testData2"); DataFrame crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("2", columnNames[1]); Assert.assertEquals("1", columnNames[2]); Row[] rows = crosstab.collect(); Arrays.sort(rows, crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }