@Test public void leftJoinTest() throws Exception { PigTest test = createPigTestFromString(leftJoinTest); this.writeLinesToFile("input1", "1,1", "2,2", "5,5"); this.writeLinesToFile("input2", "1,10", "3,30", "5,50"); this.writeLinesToFile("input3", "2,100", "5,500"); test.runScript(); List<Tuple> lines = this.getLinesForAlias(test, "data5"); Assert.assertEquals(3, lines.size()); for (Tuple t : lines) { switch ((Integer) t.get(0)) { case 1: Assert.assertEquals(10L, t.get(1)); Assert.assertEquals(0L, t.get(2)); break; case 2: Assert.assertEquals(0L, t.get(1)); Assert.assertEquals(100L, t.get(2)); break; case 5: Assert.assertEquals(50L, t.get(1)); Assert.assertEquals(500L, t.get(2)); break; default: Assert.fail("Did not expect: " + t.get(0)); } } }
@Test public void coalesceCastIntToDatetimeLazyTest() throws Exception { PigTest test = createPigTestFromString(coalesceCastIntToDatetimeLazyTest); this.writeLinesToFile("input", "1,1375826183000", "2,"); test.runScript(); List<Tuple> lines = this.getLinesForAlias(test, "data3"); Assert.assertEquals(2, lines.size()); for (Tuple t : lines) { Integer testcase = (Integer) t.get(0); Assert.assertNotNull(testcase); switch (testcase) { case 1: Assert.assertEquals( "2013-08-06T21:56:23.000Z", ((DateTime) t.get(1)).toDateTime(DateTimeZone.UTC).toString()); break; case 2: Assert.assertEquals("1970-01-01T00:00:00.000Z", t.get(1).toString()); break; default: Assert.fail("Did not expect: " + t.get(1)); } } }
@Test(expectedExceptions = FrontendException.class) public void coalesceBagIncompatibleTypeTest() throws Exception { PigTest test = createPigTestFromString(coalesceBagIncompatibleTypeTest); this.writeLinesToFile("input", "1,1,2L}"); test.runScript(); this.getLinesForAlias(test, "data3"); }
@Test public void testMostSeenLetterScript() throws IOException, ParseException { URL scriptUrl = this.getClass().getResource("/pig/MostSeenStartLetter.pig"); PigTest pigTest = new PigTest(scriptUrl.getPath()); String[] input = {"input text for the script", "second line for the input text"}; String[] expected = {"(t,4)"}; pigTest.assertOutput("lines", input, "result", expected); }
@Test public void testStore() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); // By default PigUnit removes all the STORE and DUMP test.unoverride("STORE"); test.runScript(); TestCase.assertTrue(cluster.delete(new Path("top_3_queries"))); }
@Test public void testOverride() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;"); String[] output = { "(yahoo,25)", "(facebook,15)", }; test.assertOutput(output); }
@Test public void testTextInput() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] input = { "yahoo\t10", "twitter\t7", "facebook\t10", "yahoo\t15", "facebook\t5", "a\t1", "b\t2", "c\t3", "d\t4", "e\t5", }; String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("data", input, "queries_limit", output); }
@Test public void testArgFiles() throws ParseException, IOException { String[] argsFile = {"test/data/pigunit/top_queries_params.txt"}; test = new PigTest(PIG_SCRIPT, null, argsFile); test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); }
@Test public void simpleRandomSampleTest() throws Exception { writeLinesToFile( "input", "A1\tB1\t1", "A1\tB1\t4", "A1\tB3\t4", "A1\tB4\t4", "A2\tB1\t4", "A2\tB2\t4", "A3\tB1\t3", "A3\tB1\t1", "A3\tB3\t77", "A4\tB1\t3", "A4\tB2\t3", "A4\tB3\t59", "A4\tB4\t29", "A5\tB1\t4", "A6\tB2\t3", "A6\tB2\t55", "A6\tB3\t1", "A7\tB1\t39", "A7\tB2\t27", "A7\tB3\t85", "A8\tB1\t4", "A8\tB2\t45", "A9\tB3\t92", "A9\tB3\t0", "A9\tB6\t42", "A9\tB5\t1", "A10\tB1\t7", "A10\tB2\t23", "A10\tB2\t1", "A10\tB2\t31", "A10\tB6\t41", "A10\tB7\t52"); int n = 32; double p = 0.3; int s = (int) Math.ceil(p * n); PigTest test = createPigTestFromString(simpleRandomSampleTest, "SAMPLING_PROBABILITY=" + p); test.runScript(); assertOutput(test, "sampled", "(" + s + ")"); }
@BeforeClass public static void setUpOnce() throws IOException { cluster = PigTest.getCluster(); cluster.update( new Path("test/data/pigunit/top_queries_input_data.txt"), new Path("top_queries_input_data.txt")); }
@Test public void stratifiedSampleTest() throws Exception { writeLinesToFile( "input", "A1\tB1\t1", "A1\tB1\t4", "A1\tB3\t4", "A1\tB4\t4", "A2\tB1\t4", "A2\tB2\t4", "A3\tB1\t3", "A3\tB1\t1", "A3\tB3\t77", "A4\tB1\t3", "A4\tB2\t3", "A4\tB3\t59", "A4\tB4\t29", "A5\tB1\t4", "A6\tB2\t3", "A6\tB2\t55", "A6\tB3\t1", "A7\tB1\t39", "A7\tB2\t27", "A7\tB3\t85", "A8\tB1\t4", "A8\tB2\t45", "A9\tB3\t92", "A9\tB3\t0", "A9\tB6\t42", "A9\tB5\t1", "A10\tB1\t7", "A10\tB2\t23", "A10\tB2\t1", "A10\tB2\t31", "A10\tB6\t41", "A10\tB7\t52"); double p = 0.5; PigTest test = createPigTestFromString(stratifiedSampleTest, "SAMPLING_PROBABILITY=" + p); test.runScript(); assertOutput( test, "sampled", "(A1,2)", "(A10,3)", "(A2,1)", "(A3,2)", "(A4,2)", "(A5,1)", "(A6,2)", "(A7,2)", "(A8,1)", "(A9,2)"); }
@Test public void testFileOutput() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); }
@Test(expectedExceptions = {org.apache.pig.impl.logicalLayer.FrontendException.class}) public void transposeBadTypeTest() throws Exception { PigTest test = createPigTestFromString(transposeBadTypeTest); writeLinesToFile("input", "1,10,11,12.0", "2,20,21,22.0"); test.runScript(); List<Tuple> output = getLinesForAlias(test, "data3"); for (Tuple tuple : output) { int testCase = (Integer) tuple.get(0); DataBag bag = (DataBag) tuple.get(1); Assert.assertEquals(bag.size(), 3); int i = 0; for (Tuple t : bag) { String expectedKey = String.format("val%d", i + 1); Assert.assertEquals((String) t.get(0), expectedKey); int actualValue = (Integer) t.get(1); Assert.assertEquals(actualValue, testCase * 10 + i); i++; } } }
@Test public void testImplicitNtoN() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput(output); }
@Ignore("Not ready yet") @Test public void testWithMock() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; PigServer mockServer = null; Cluster mockCluster = null; test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster); test.assertOutput(new File("data/top_queries_expected_top_3.txt")); }
// The first parameter is an int, but the fixed parameter is a long. // They are merged to a long. @Test public void coalesceCastIntToDoubleTest() throws Exception { PigTest test = createPigTestFromString(coalesceIntAndDoubleTest); this.writeLinesToFile("input", "1,5", "2,"); test.runScript(); List<Tuple> lines = this.getLinesForAlias(test, "data4"); Assert.assertEquals(2, lines.size()); for (Tuple t : lines) { switch ((Integer) t.get(0)) { case 1: Assert.assertEquals(500.0, t.get(1)); break; case 2: Assert.assertEquals(10000.0, t.get(1)); break; default: Assert.fail("Did not expect: " + t.get(1)); } } }
@Test public void coalesceIntTest() throws Exception { PigTest test = createPigTestFromString(coalesceIntTest); this.writeLinesToFile("input", "1,1,2,3", "2,,2,3", "3,,,3", "4,,,", "5,1,,3", "6,1,,"); test.runScript(); List<Tuple> lines = this.getLinesForAlias(test, "data3"); Assert.assertEquals(6, lines.size()); for (Tuple t : lines) { switch ((Integer) t.get(0)) { case 1: Assert.assertEquals(1, t.get(1)); break; case 2: Assert.assertEquals(2, t.get(1)); break; case 3: Assert.assertEquals(3, t.get(1)); break; case 4: Assert.assertEquals(null, t.get(1)); break; case 5: Assert.assertEquals(1, t.get(1)); break; case 6: Assert.assertEquals(1, t.get(1)); break; default: Assert.fail("Did not expect: " + t.get(1)); } } }
@Test public void testGetLastAlias() throws ParseException, IOException { String[] script = { "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String expected = "(yahoo,25)\n" + "(facebook,15)\n" + "(twitter,7)"; TestCase.assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n")); }
@Test public void testInlinePigScript() throws ParseException, IOException { String[] script = { "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput(output); }
@Test public void testWithUdf() throws ParseException, IOException { String[] script = { // "REGISTER myIfNeeded.jar;", "DEFINE TOKENIZE TOKENIZE();", "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;", "queries_ordered = ORDER queries BY query DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String[] output = { "(yahoo,{(yahoo)})", "(yahoo,{(yahoo)})", "(twitter,{(twitter)})", }; test.assertOutput(output); }
/** * This is a test for default bootup. PIG-2456 * * @throws IOException */ @Test public void testDefaultBootup() throws ParseException, IOException { // Test with properties file String pigProps = "pig.properties"; String bootupPath = "/tmp/.temppigbootup"; File propertyFile = new File(pigProps); PrintWriter out = new PrintWriter(new FileWriter(propertyFile)); out.println("pig.load.default.statements=" + bootupPath); out.close(); File bootupFile = new File(bootupPath); out = new PrintWriter(new FileWriter(bootupFile)); out.println("data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);"); out.close(); String[] script = { // The following line is commented as the test creates a bootup file which contains it // instead. PigTests (and Pig scripts in general) will read the bootup file to load default // statements // "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; String scriptPath = "/tmp/tempScript"; File scriptFile = new File(scriptPath); out = new PrintWriter(new FileWriter(scriptFile)); for (String line : script) { out.println(line); } out.close(); String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; // Create a pigunit.pig.PigServer and Cluster to run this test. PigServer pig = null; if (System.getProperties().containsKey("pigunit.exectype.cluster")) { LOG.info("Using cluster mode"); pig = new PigServer(ExecType.MAPREDUCE); } else { LOG.info("Using default local mode"); pig = new PigServer(ExecType.LOCAL); } final Cluster cluster = new Cluster(pig.getPigContext()); test = new PigTest(scriptPath, args, pig, cluster); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("queries_limit", output); propertyFile.delete(); scriptFile.delete(); bootupFile.delete(); }