// See PIG-1434 @Test public void testScalarAliasesJoinClause() throws Exception { String[] inputA = {"1\t5", "2\t10", "3\t20"}; String[] inputB = {"Total3\tthree", "Total2\ttwo", "Total1\tone"}; // Test the use of scalars in expressions String inputPathA = BUILD_TEST_TMP + "table_testScalarAliasesJoinClauseA"; TestScalarAliases.createLocalInputFile(inputPathA, inputA); String inputPathB = BUILD_TEST_TMP + "table_testScalarAliasesJoinClauseB"; TestScalarAliases.createLocalInputFile(inputPathB, inputB); // Test in script mode pigServer.registerQuery("A = LOAD '" + inputPathA + "' as (a0, a1);"); pigServer.registerQuery("G = group A all;"); pigServer.registerQuery("C = foreach G generate COUNT(A) as count;"); pigServer.registerQuery("B = LOAD '" + inputPathB + "' as (b0:chararray, b1:chararray);"); pigServer.registerQuery("Y = join A by CONCAT('Total', (chararray)C.count), B by $0;"); Iterator<Tuple> iter = pigServer.openIterator("Y"); String[] expected = new String[] {"(1,5,Total3,three)", "(2,10,Total3,three)", "(3,20,Total3,three)"}; Util.checkQueryOutputsAfterSortRecursive( iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("Y"))); }
// See PIG-1434 @Test public void testScalarWithNoSchemaDollarProj() throws Exception { String[] scalarInput = {"1\t5"}; String[] input = {"1\t5", "2\t10", "3\t20"}; String inputPath = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProj"; TestScalarAliases.createLocalInputFile(inputPath, input); String inputPathScalar = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProjScalar"; TestScalarAliases.createLocalInputFile(inputPathScalar, scalarInput); // Load A as a scalar pigServer.registerQuery("A = LOAD '" + inputPath + "';"); pigServer.registerQuery("scalar = LOAD '" + inputPathScalar + "';"); pigServer.registerQuery("B = foreach A generate 5 / scalar.$1;"); Iterator<Tuple> iter = pigServer.openIterator("B"); Tuple t = iter.next(); assertTrue(t.get(0).toString().equals("1")); t = iter.next(); assertTrue(t.get(0).toString().equals("1")); t = iter.next(); assertTrue(t.get(0).toString().equals("1")); assertFalse(iter.hasNext()); }
// See PIG-1434 @Test public void testFilteredScalarDollarProj() throws Exception { String output = BUILD_TEST_TMP + "table_testFilteredScalarDollarProjDir"; TestScalarAliases.deleteDirectory(new File(output)); String[] input = { "1\t5\t[state#maine,city#portland]\t{(a),(b)}\t(a,b)", "2\t10\t\t\t", "3\t20\t\t\t" }; // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testFilteredScalarDollarProj"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery( "A = LOAD '" + inputPath + "'" + " as (a0: long, a1: double, a2 : bytearray, " + "a3: bag{ t : tuple(tc : chararray)}, " + "a4: tuple(c1 : chararray, c2 : chararray) );"); pigServer.registerQuery("B = filter A by $1 < 8;"); pigServer.registerQuery( "Y = foreach A generate (a0 * B.$0), (a1 / B.$1), B.$2, B.$2#'state', B.$3, B.a4;"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.explain("Y", System.err); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);"); pigServer.explain("Z", System.err); Iterator<Tuple> iter = pigServer.openIterator("Z"); Tuple t = iter.next(); assertTrue(t.toString().equals("(1,1.0)")); t = iter.next(); assertTrue(t.toString().equals("(2,2.0)")); t = iter.next(); assertTrue(t.toString().equals("(3,4.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertEquals(t.toString(), "(1,1.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); t = iter.next(); assertEquals(t.toString(), "(2,2.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); t = iter.next(); assertEquals(t.toString(), "(3,4.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); assertFalse(iter.hasNext()); }
// See PIG-1434 @Test public void testScalarWithTwoBranches() throws Exception { String[] inputA = {"1\t5", "2\t10", "3\t20"}; String[] inputX = {"pig", "hadoop", "rocks"}; String output = BUILD_TEST_TMP + "testScalarWithTwoBranchesDir"; TestScalarAliases.deleteDirectory(new File(output)); // Test the use of scalars in expressions String inputPathA = BUILD_TEST_TMP + "testScalarWithTwoBranchesA"; TestScalarAliases.createLocalInputFile(inputPathA, inputA); String inputPathX = BUILD_TEST_TMP + "testScalarWithTwoBranchesX"; TestScalarAliases.createLocalInputFile(inputPathX, inputX); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPathA + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("X = LOAD '" + inputPathX + "' as (names: chararray);"); pigServer.registerQuery("Y = foreach X generate names, C.max;"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: chararray, a1: double);"); Iterator<Tuple> iter = pigServer.openIterator("Z"); Tuple t = iter.next(); assertTrue(t.toString().equals("(pig,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(hadoop,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(rocks,20.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(pig,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(hadoop,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(rocks,20.0)")); assertFalse(iter.hasNext()); pigServer.getPigContext().getProperties().remove("tez.am.inline.task.execution.max-tasks"); }
// See PIG-1434 @Test public void testScalarAliasesBatchNobatch() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20"}; String output = BUILD_TEST_TMP + "table_testScalarAliasesDir"; TestScalarAliases.deleteDirectory(new File(output)); // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesBatch"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);"); Iterator<Tuple> iter; Tuple t; iter = pigServer.openIterator("Z"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); }
// See PIG-1636 @Test public void testScalarAliasesLimit() throws Exception { String[] input = {"a\t1", "b\t2", "c\t3", "a\t4", "c\t5"}; // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesLimit"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0:chararray, a1: int);"); pigServer.registerQuery("G = group A all;"); pigServer.registerQuery("C = foreach G generate SUM(A.$1) as total;"); pigServer.registerQuery("C1 = limit C 1;"); pigServer.registerQuery("Y = foreach A generate a0, a1 * (double)C1.total;"); Iterator<Tuple> iter = pigServer.openIterator("Y"); // Average is 11 Tuple t = iter.next(); assertTrue(t.toString().equals("(a,15.0)")); t = iter.next(); assertTrue(t.toString().equals("(b,30.0)")); t = iter.next(); assertTrue(t.toString().equals("(c,45.0)")); t = iter.next(); assertTrue(t.toString().equals("(a,60.0)")); t = iter.next(); assertTrue(t.toString().equals("(c,75.0)")); assertFalse(iter.hasNext()); }
// See PIG-1434 @Test public void testScalarAliasesFilterClause() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20", "4\t12", "5\t8"}; // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesFilterClause"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0, a1);"); pigServer.registerQuery("G = group A all;"); pigServer.registerQuery("C = foreach G generate AVG(A.$1) as average;"); pigServer.registerQuery("Y = filter A by a1 > C.average;"); Iterator<Tuple> iter = pigServer.openIterator("Y"); // Average is 11 Tuple t = iter.next(); assertTrue(t.toString().equals("(3,20)")); t = iter.next(); assertTrue(t.toString().equals("(4,12)")); assertFalse(iter.hasNext()); }
// See PIG-1434 @Test public void testScalarAliasesGrammarNegative() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20"}; String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesGrammar"; TestScalarAliases.createLocalInputFile(inputPath, input); try { pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A);"); // Only projections of C are supported pigServer.registerQuery("Y = foreach A generate C;"); pigServer.openIterator("Y"); // Control should not reach here fail("Scalar projections are only supported"); } catch (IOException pe) { assertTrue(pe.getMessage().contains("Invalid scalar projection: C")); } }
// See PIG-1434 @Test public void testUseScalarMultipleTimes() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20"}; String outputY = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutY"; TestScalarAliases.deleteDirectory(new File(outputY)); String outputZ = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutZ"; TestScalarAliases.deleteDirectory(new File(outputZ)); // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testUseScalarMultipleTimes"; TestScalarAliases.createLocalInputFile(inputPath, input); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);"); pigServer.registerQuery("Store Y into '" + outputY + "';"); pigServer.registerQuery("Z = foreach A generate (a1 + C.count), (a0 * C.max);"); pigServer.registerQuery("Store Z into '" + outputZ + "';"); // Test Multiquery store pigServer.executeBatch(); // Check output pigServer.registerQuery("M = LOAD '" + outputY + "' as (a0: int, a1: double);"); Iterator<Tuple> iter; Tuple t; iter = pigServer.openIterator("M"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); // Check output pigServer.registerQuery("N = LOAD '" + outputZ + "' as (a0: double, a1: double);"); iter = pigServer.openIterator("N"); t = iter.next(); assertTrue(t.toString().equals("(8.0,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(13.0,40.0)")); t = iter.next(); assertTrue(t.toString().equals("(23.0,60.0)")); assertFalse(iter.hasNext()); // Non batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Z"); t = iter.next(); assertTrue(t.toString().equals("(8.0,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(13.0,40.0)")); t = iter.next(); assertTrue(t.toString().equals("(23.0,60.0)")); assertFalse(iter.hasNext()); }