public void testSimplePlan() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load 'd.txt';"); lpt.buildPlan("b = filter a by $0==NULL;"); LogicalPlan plan = lpt.buildPlan("store b into 'empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals(3, newPlan.size()); assertEquals(newPlan.getSources().size(), 1); // check load LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSources().get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOLoad.class); // check filter op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOFilter.class); LogicalExpressionPlan exp = ((org.apache.pig.experimental.logical.relational.LOFilter) op).getFilterPlan(); EqualExpression eq = (EqualExpression) exp.getSources().get(0); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression) eq.getLhs()).getColNum(), 0); assertEquals(((ProjectExpression) eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); // check store op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOStore.class); }
public void testCoGroup() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = group a by name;"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalSchema loadSchema = ((LogicalRelationalOperator) newPlan.getSources().get(0)).getSchema(); Set<Long> uids = getAllUids(loadSchema); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); assertEquals(LOCogroup.class, op.getClass()); LogicalSchema schema = op.getSchema(); assertEquals(2, schema.size()); assertEquals(DataType.CHARARRAY, schema.getField(0).type); assertEquals(false, uids.contains(schema.getField(0).uid)); assertEquals(0, schema.getField(0).alias.compareTo("group")); assertEquals(DataType.BAG, schema.getField(1).type); assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type); assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name")); assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid); assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type); assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age")); assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid); assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type); assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa")); assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid); uids.add(Long.valueOf(schema.getField(0).uid)); assertEquals(false, uids.contains(schema.getField(1).uid)); assertEquals( LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass()); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals(1, expressionPlans.size()); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals(1, plans.size()); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals(1, exprPlan.getSinks().size()); assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass()); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals(loadSchema.getField(0).uid, prj.getUid()); assertEquals(0, prj.getColNum()); assertEquals(0, prj.getInputNum()); }
@Test public void testMRCompilerErr() throws Exception { planTester.buildPlan("a = load 'input';"); LogicalPlan lp = planTester.buildPlan("b = filter a by $0 > 5;"); PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc); pp.remove(pp.getRoots().get(0)); try { Util.buildMRPlan(new PhysicalPlan(), pc); fail("Expected failure."); } catch (MRCompilerException mrce) { assertTrue(mrce.getErrorCode() == 2053); } }
/** * Test to ensure that the order by with parallel followed by a limit, i.e., top k always produces * the correct number of map reduce jobs */ @Test public void testNumReducersInLimitWithParallel() throws Exception { planTester.buildPlan("a = load 'input';"); planTester.buildPlan("b = order a by $0 parallel 2;"); planTester.buildPlan("c = limit b 10;"); LogicalPlan lp = planTester.buildPlan("store c into '/tmp';"); PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper mrOper = mrPlan.getRoots().get(0); int count = 1; while (mrPlan.getSuccessors(mrOper) != null) { mrOper = mrPlan.getSuccessors(mrOper).get(0); ++count; } assertTrue(count == 4); }
@Test public void testImplicitSplitInCoGroup2() throws Exception { // this query is similar to the one reported in JIRA - PIG-537 LogicalPlanTester planTester = new LogicalPlanTester(); planTester.buildPlan("a = load 'file1' using PigStorage(':') as (name:chararray, marks:int);"); planTester.buildPlan( "b = load 'file2' using PigStorage(':') as (name:chararray, rank:chararray);"); planTester.buildPlan("c = cogroup a by name, b by name;"); planTester.buildPlan("d = foreach c generate group, FLATTEN(a.marks) as newmarks;"); planTester.buildPlan("e = cogroup a by marks, d by newmarks;"); LogicalPlan plan = planTester.buildPlan("f = foreach e generate group, flatten(a), flatten(d);"); // Set the logical plan values correctly in all the operators PlanSetter ps = new PlanSetter(plan); ps.visit(); // run through validator CompilationMessageCollector collector = new CompilationMessageCollector(); TypeCheckingValidator typeValidator = new TypeCheckingValidator(); typeValidator.validate(plan, collector); printMessageCollector(collector); printTypeGraph(plan); if (collector.hasError()) { throw new Exception("Error during type checking"); } // this will run ImplicitSplitInserter TestLogicalOptimizer.optimizePlan(plan); // get Schema of leaf and compare: Schema expectedSchema = Util.getSchemaFromString( "grp: int,A::username: chararray,A::marks: int,AB::group: chararray,AB::newmarks: int"); assertTrue(Schema.equals(expectedSchema, plan.getLeaves().get(0).getSchema(), false, true)); }
public void testCoGroup4() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = load '/test/e.txt' as (name:chararray, age:int, blah:chararray );"); lpt.buildPlan("c = group a by ( name, age ), b by ( name, age );"); LogicalPlan plan = lpt.buildPlan("store c into '/test/empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals( LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass()); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); // Reason for this strange way of getting the load schema is to maintain the sequence correctly LogicalSchema loadSchema = ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(0)).getSchema(); LogicalSchema load2Schema = ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(1)).getSchema(); Set<Long> uids = getAllUids(loadSchema); uids.addAll(getAllUids(load2Schema)); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); assertEquals(LOCogroup.class, op.getClass()); LogicalSchema schema = op.getSchema(); assertEquals(3, schema.size()); assertEquals(DataType.TUPLE, schema.getField(0).type); assertEquals(false, uids.contains(schema.getField(0).uid)); assertEquals(0, schema.getField(0).alias.compareTo("group")); assertEquals(DataType.CHARARRAY, schema.getField(0).schema.getField(0).type); assertEquals(0, schema.getField(0).schema.getField(0).alias.compareTo("name")); assertEquals(DataType.INTEGER, schema.getField(0).schema.getField(1).type); assertEquals(0, schema.getField(0).schema.getField(1).alias.compareTo("age")); assertEquals(DataType.BAG, schema.getField(1).type); assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type); assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name")); assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid); assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type); assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age")); assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid); assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type); assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa")); assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid); assertEquals(DataType.BAG, schema.getField(2).type); assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(0).type); assertEquals(0, schema.getField(2).schema.getField(0).alias.compareTo("name")); assertEquals(load2Schema.getField(0).uid, schema.getField(2).schema.getField(0).uid); assertEquals(DataType.INTEGER, schema.getField(2).schema.getField(1).type); assertEquals(0, schema.getField(2).schema.getField(1).alias.compareTo("age")); assertEquals(load2Schema.getField(1).uid, schema.getField(2).schema.getField(1).uid); assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(2).type); assertEquals(0, schema.getField(2).schema.getField(2).alias.compareTo("blah")); assertEquals(load2Schema.getField(2).uid, schema.getField(2).schema.getField(2).uid); // We are doing Uid tests at the end as the uids should not repeat assertEquals(false, uids.contains(schema.getField(0).schema.getField(0).uid)); assertEquals(false, uids.contains(schema.getField(0).schema.getField(1).uid)); assertEquals(false, uids.contains(schema.getField(1).uid)); uids.add(schema.getField(1).uid); assertEquals(false, uids.contains(schema.getField(2).uid)); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals(2, expressionPlans.size()); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals(2, plans.size()); List<LogicalExpressionPlan> plans2 = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(1)); assertEquals(2, plans2.size()); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals(1, exprPlan.getSinks().size()); assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass()); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals(loadSchema.getField(0).uid, prj.getUid()); assertEquals(0, prj.getColNum()); assertEquals(0, prj.getInputNum()); LogicalExpressionPlan exprPlan2 = plans.get(1); assertEquals(1, exprPlan2.getSinks().size()); assertEquals(ProjectExpression.class, exprPlan2.getSinks().get(0).getClass()); ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0); assertEquals(loadSchema.getField(1).uid, prj2.getUid()); assertEquals(1, prj2.getColNum()); assertEquals(0, prj2.getInputNum()); LogicalExpressionPlan exprPlan3 = plans2.get(0); assertEquals(1, exprPlan3.getSinks().size()); assertEquals(ProjectExpression.class, exprPlan3.getSinks().get(0).getClass()); ProjectExpression prj3 = (ProjectExpression) exprPlan3.getSinks().get(0); assertEquals(load2Schema.getField(0).uid, prj3.getUid()); assertEquals(0, prj3.getColNum()); assertEquals(1, prj3.getInputNum()); LogicalExpressionPlan exprPlan4 = plans2.get(1); assertEquals(1, exprPlan4.getSinks().size()); assertEquals(ProjectExpression.class, exprPlan4.getSinks().get(0).getClass()); ProjectExpression prj4 = (ProjectExpression) exprPlan4.getSinks().get(0); assertEquals(load2Schema.getField(1).uid, prj4.getUid()); assertEquals(1, prj4.getColNum()); assertEquals(1, prj4.getInputNum()); }
public void testForeachPlan2() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(id:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); org.apache.pig.experimental.logical.relational.LogicalPlan expected = new org.apache.pig.experimental.logical.relational.LogicalPlan(); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); LOLoad load = new LOLoad( new FileSpec("file:///test/d.txt", new FuncSpec("org.apache.pig.builtin.PigStorage")), aschema, expected); expected.add(load); LOForEach foreach2 = new LOForEach(expected); org.apache.pig.experimental.logical.relational.LogicalPlan innerPlan = new org.apache.pig.experimental.logical.relational.LogicalPlan(); LOInnerLoad l1 = new LOInnerLoad(innerPlan, foreach2, 0); innerPlan.add(l1); LOInnerLoad l2 = new LOInnerLoad(innerPlan, foreach2, 1); List<LogicalExpressionPlan> eps = new ArrayList<LogicalExpressionPlan>(); LogicalExpressionPlan p1 = new LogicalExpressionPlan(); new ProjectExpression(p1, DataType.BYTEARRAY, 0, 0); LogicalExpressionPlan p2 = new LogicalExpressionPlan(); new ProjectExpression(p2, DataType.BAG, 1, 0); eps.add(p1); eps.add(p2); LOGenerate gen = new LOGenerate(innerPlan, eps, new boolean[] {false, true}); innerPlan.add(gen); innerPlan.connect(l1, gen); innerPlan.connect(l2, gen); foreach2.setInnerPlan(innerPlan); expected.add(foreach2); LOStore s = new LOStore( expected, new FileSpec("file:///test/empty", new FuncSpec("org.apache.pig.builtin.PigStorage"))); expected.add(s); expected.connect(load, foreach2); expected.connect(foreach2, s); try { UidStamper stamper = new UidStamper(expected); stamper.visit(); } catch (Exception e) { throw new VisitorException(e); } assertTrue(expected.isEqual(newPlan)); LogicalSchema schema = foreach2.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d::id", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); assertTrue(schema.getField("id") == schema.getField(0)); assertTrue(schema.getField("d::id") == schema.getField(1)); }
public void testForeachSchema() throws Exception { // test flatten LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:tuple(v, s));"); LogicalPlan plan = lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSinks().get(0); LogicalSchema s2 = new LogicalSchema(); s2.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(s2.isEqual(op.getSchema())); // test no flatten lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v, s)});"); plan = lpt.buildPlan("b = foreach a generate id, d;"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); assertTrue(aschema.isEqual(op.getSchema())); // check with defined data type lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0); LogicalSchema schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); // test with add lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, v:int, s:int);"); lpt.buildPlan("b = foreach a generate id, v+s;"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0); schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER)); assertTrue(schema.isEqual(aschema)); }
public void testJoinPlan() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load 'd1.txt' as (id, c);"); lpt.buildPlan("b = load 'd2.txt'as (id, c);"); lpt.buildPlan("c = join a by id, b by c;"); lpt.buildPlan("d = filter c by a::id==NULL AND b::c==NULL;"); LogicalPlan plan = lpt.buildPlan("store d into 'empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals(5, newPlan.size()); assertEquals(newPlan.getSources().size(), 2); // check load and join LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOJoin.class); assertEquals(((LOJoin) op).getJoinType(), LOJoin.JOINTYPE.HASH); LogicalRelationalOperator l1 = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0); assertEquals(l1.getClass(), org.apache.pig.experimental.logical.relational.LOLoad.class); assertEquals(l1.getAlias(), "a"); LogicalRelationalOperator l2 = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(1); assertEquals(l2.getClass(), org.apache.pig.experimental.logical.relational.LOLoad.class); assertEquals(l2.getAlias(), "b"); // check join input plans LogicalExpressionPlan p1 = ((LOJoin) op).getJoinPlan(0).iterator().next(); assertEquals(p1.size(), 1); ProjectExpression prj = (ProjectExpression) p1.getSources().get(0); assertEquals(prj.getInputNum(), 0); assertEquals(prj.getColNum(), 0); LogicalExpressionPlan p2 = ((LOJoin) op).getJoinPlan(1).iterator().next(); assertEquals(p2.size(), 1); prj = (ProjectExpression) p2.getSources().get(0); assertEquals(prj.getInputNum(), 1); assertEquals(prj.getColNum(), 1); // check filter op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOFilter.class); LogicalExpressionPlan exp = ((org.apache.pig.experimental.logical.relational.LOFilter) op).getFilterPlan(); AndExpression ae = (AndExpression) exp.getSources().get(0); EqualExpression eq = (EqualExpression) exp.getSuccessors(ae).get(0); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression) eq.getLhs()).getColNum(), 0); assertEquals(((ProjectExpression) eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); eq = (EqualExpression) exp.getSuccessors(ae).get(1); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression) eq.getLhs()).getColNum(), 3); assertEquals(((ProjectExpression) eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); // check store op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOStore.class); }