@Test public void testBzipStoreInMultiQuery2() throws Exception { String[] inputData = new String[] {"1\t2\r3\t4"}; String inputFileName = "input2.txt"; Util.createInputFile(cluster, inputFileName, inputData); PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PigContext pigContext = pig.getPigContext(); pigContext.getProperties().setProperty("output.compression.enabled", "true"); pigContext .getProperties() .setProperty("output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec"); pig.setBatchOn(); pig.registerQuery("a = load '" + inputFileName + "';"); pig.registerQuery("store a into 'output2.bz2';"); pig.registerQuery("store a into 'output2';"); pig.executeBatch(); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties())); FileStatus stat = fs.getFileStatus(new Path("output2/part-m-00000.bz2")); assertTrue(stat.getLen() > 0); stat = fs.getFileStatus(new Path("output2.bz2/part-m-00000.bz2")); assertTrue(stat.getLen() > 0); }
// See PIG-1434 @Test public void testFilteredScalarDollarProj() throws Exception { String output = BUILD_TEST_TMP + "table_testFilteredScalarDollarProjDir"; TestScalarAliases.deleteDirectory(new File(output)); String[] input = { "1\t5\t[state#maine,city#portland]\t{(a),(b)}\t(a,b)", "2\t10\t\t\t", "3\t20\t\t\t" }; // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testFilteredScalarDollarProj"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery( "A = LOAD '" + inputPath + "'" + " as (a0: long, a1: double, a2 : bytearray, " + "a3: bag{ t : tuple(tc : chararray)}, " + "a4: tuple(c1 : chararray, c2 : chararray) );"); pigServer.registerQuery("B = filter A by $1 < 8;"); pigServer.registerQuery( "Y = foreach A generate (a0 * B.$0), (a1 / B.$1), B.$2, B.$2#'state', B.$3, B.a4;"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.explain("Y", System.err); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);"); pigServer.explain("Z", System.err); Iterator<Tuple> iter = pigServer.openIterator("Z"); Tuple t = iter.next(); assertTrue(t.toString().equals("(1,1.0)")); t = iter.next(); assertTrue(t.toString().equals("(2,2.0)")); t = iter.next(); assertTrue(t.toString().equals("(3,4.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertEquals(t.toString(), "(1,1.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); t = iter.next(); assertEquals(t.toString(), "(2,2.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); t = iter.next(); assertEquals(t.toString(), "(3,4.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))"); assertFalse(iter.hasNext()); }
// See PIG-1434 @Test public void testScalarWithTwoBranches() throws Exception { String[] inputA = {"1\t5", "2\t10", "3\t20"}; String[] inputX = {"pig", "hadoop", "rocks"}; String output = BUILD_TEST_TMP + "testScalarWithTwoBranchesDir"; TestScalarAliases.deleteDirectory(new File(output)); // Test the use of scalars in expressions String inputPathA = BUILD_TEST_TMP + "testScalarWithTwoBranchesA"; TestScalarAliases.createLocalInputFile(inputPathA, inputA); String inputPathX = BUILD_TEST_TMP + "testScalarWithTwoBranchesX"; TestScalarAliases.createLocalInputFile(inputPathX, inputX); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPathA + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("X = LOAD '" + inputPathX + "' as (names: chararray);"); pigServer.registerQuery("Y = foreach X generate names, C.max;"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: chararray, a1: double);"); Iterator<Tuple> iter = pigServer.openIterator("Z"); Tuple t = iter.next(); assertTrue(t.toString().equals("(pig,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(hadoop,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(rocks,20.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(pig,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(hadoop,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(rocks,20.0)")); assertFalse(iter.hasNext()); pigServer.getPigContext().getProperties().remove("tez.am.inline.task.execution.max-tasks"); }
// See PIG-1434 @Test public void testScalarAliasesBatchNobatch() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20"}; String output = BUILD_TEST_TMP + "table_testScalarAliasesDir"; TestScalarAliases.deleteDirectory(new File(output)); // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesBatch"; TestScalarAliases.createLocalInputFile(inputPath, input); // Test in script mode pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);"); pigServer.registerQuery("Store Y into '" + output + "';"); pigServer.executeBatch(); // Check output pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);"); Iterator<Tuple> iter; Tuple t; iter = pigServer.openIterator("Z"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); }
@Test public void testScalarNullValue() throws Exception { Storage.Data data = Storage.resetData(pigServer); data.set("input", Storage.tuple("a", 1), Storage.tuple("b", 2)); pigServer.setBatchOn(); pigServer.registerQuery("A = load 'input' using mock.Storage() as (a:chararray, b:int);"); pigServer.registerQuery("B = FILTER A by a == 'c';"); pigServer.registerQuery("C = FOREACH A generate a, b + B.b;"); pigServer.registerQuery("store C into 'output' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualResults = data.get("output"); List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(new String[] {"('a', null)", "('b', null)"}); Util.checkQueryOutputsAfterSort(actualResults.iterator(), expectedResults); }
@Test public void testUpdate() throws Exception { BasicDBObject obj1 = new BasicDBObject().append("f1", "a").append("f2", "value1"); BasicDBObject obj2 = new BasicDBObject().append("f1", "b").append("f2", "value2"); insertData("testUpdate", obj1, obj2); String[] input = {"a\tnewValue1\t1", "b\tnewValue2\t2"}; Util.createLocalInputFile("simple_input", input); pigServerLocal = new PigServer(ExecType.LOCAL); pigServerLocal.registerQuery( "A = LOAD 'simple_input' as (f1:chararray, f2:chararray, f3:int);"); pigServerLocal.registerQuery( String.format( "STORE A INTO 'mongodb://localhost:27017/%s.%s' USING com.mongodb.hadoop.pig.MongoUpdateStorage(" + " '{f1:\"\\\\$f1\"}'," + " '{\\\\$set:{f2:\"\\\\$f2\", f3:\"\\\\$f3\"}}'," + " 'f1:chararray, f2:chararray, f3:int'" + ");", dbName, "update_simple")); pigServerLocal.setBatchOn(); pigServerLocal.executeBatch(); MongoClient mc = new MongoClient(); DBCollection col = mc.getDB(dbName).getCollection("update_simple"); DBCursor cursor = col.find(); assertEquals(2, cursor.size()); DBObject result1 = cursor.next(); assertEquals("a", result1.get("f1")); assertEquals("newValue1", result1.get("f2")); assertEquals(1, result1.get("f3")); DBObject result2 = cursor.next(); assertEquals("b", result2.get("f1")); assertEquals("newValue2", result2.get("f2")); assertEquals(2, result2.get("f3")); }
// org.apache.pig.test.udf.storefunc.PigPerformanceLoader() @Test public void testScriptL1() throws Exception { System.out.println("testScriptL1"); PigServer pigServer = new PigServer(pigContext); pigServer.setBatchOn(); // pigServer.registerJar("/home/kaituo/code/pig3/trunk/pigperf.jar"); pigServer.registerQuery( "A = load '" + page_viewsX.toString() + "/part-m-00000' using org.apache.pig.test.udf.storefunc.PigPerformanceLoader() as (user: chararray, action:int, timespent:int, query_term:chararray, ip_addr:long, timestamp:long,estimated_revenue:double, page_info:map[], page_links:bag{t:(p:map[])});"); // as (user, action, timespent, query_term, ip_addr, timestamp, estimated_revenue, page_info, // page_links);"); pigServer.registerQuery( "B = foreach A generate user, action, page_info, flatten(page_links) as page_links;"); // user, (int)action as action, (map[])page_info as page_info, // flatten((bag{tuple(map[])})page_links) as page_links;"); pigServer.registerQuery( "C = foreach B generate user, (action == 1 ? page_info#'a' : page_links#'b') as header;"); pigServer.registerQuery("D = group C by user parallel 40;"); pigServer.registerQuery("E = foreach D generate group, COUNT(C) as cnt;"); Map<Operator, DataBag> derivedData = pigServer.getExamples2("E"); assertTrue(derivedData != null); }
// See PIG-1434 @Test public void testUseScalarMultipleTimes() throws Exception { String[] input = {"1\t5", "2\t10", "3\t20"}; String outputY = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutY"; TestScalarAliases.deleteDirectory(new File(outputY)); String outputZ = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutZ"; TestScalarAliases.deleteDirectory(new File(outputZ)); // Test the use of scalars in expressions String inputPath = BUILD_TEST_TMP + "table_testUseScalarMultipleTimes"; TestScalarAliases.createLocalInputFile(inputPath, input); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;"); pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);"); pigServer.registerQuery("Store Y into '" + outputY + "';"); pigServer.registerQuery("Z = foreach A generate (a1 + C.count), (a0 * C.max);"); pigServer.registerQuery("Store Z into '" + outputZ + "';"); // Test Multiquery store pigServer.executeBatch(); // Check output pigServer.registerQuery("M = LOAD '" + outputY + "' as (a0: int, a1: double);"); Iterator<Tuple> iter; Tuple t; iter = pigServer.openIterator("M"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); // Check output pigServer.registerQuery("N = LOAD '" + outputZ + "' as (a0: double, a1: double);"); iter = pigServer.openIterator("N"); t = iter.next(); assertTrue(t.toString().equals("(8.0,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(13.0,40.0)")); t = iter.next(); assertTrue(t.toString().equals("(23.0,60.0)")); assertFalse(iter.hasNext()); // Non batch mode iter = pigServer.openIterator("Y"); t = iter.next(); assertTrue(t.toString().equals("(3,0.25)")); t = iter.next(); assertTrue(t.toString().equals("(6,0.5)")); t = iter.next(); assertTrue(t.toString().equals("(9,1.0)")); assertFalse(iter.hasNext()); // Check in non-batch mode iter = pigServer.openIterator("Z"); t = iter.next(); assertTrue(t.toString().equals("(8.0,20.0)")); t = iter.next(); assertTrue(t.toString().equals("(13.0,40.0)")); t = iter.next(); assertTrue(t.toString().equals("(23.0,60.0)")); assertFalse(iter.hasNext()); }