public void testForeachPlan2() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(id:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); org.apache.pig.experimental.logical.relational.LogicalPlan expected = new org.apache.pig.experimental.logical.relational.LogicalPlan(); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); LOLoad load = new LOLoad( new FileSpec("file:///test/d.txt", new FuncSpec("org.apache.pig.builtin.PigStorage")), aschema, expected); expected.add(load); LOForEach foreach2 = new LOForEach(expected); org.apache.pig.experimental.logical.relational.LogicalPlan innerPlan = new org.apache.pig.experimental.logical.relational.LogicalPlan(); LOInnerLoad l1 = new LOInnerLoad(innerPlan, foreach2, 0); innerPlan.add(l1); LOInnerLoad l2 = new LOInnerLoad(innerPlan, foreach2, 1); List<LogicalExpressionPlan> eps = new ArrayList<LogicalExpressionPlan>(); LogicalExpressionPlan p1 = new LogicalExpressionPlan(); new ProjectExpression(p1, DataType.BYTEARRAY, 0, 0); LogicalExpressionPlan p2 = new LogicalExpressionPlan(); new ProjectExpression(p2, DataType.BAG, 1, 0); eps.add(p1); eps.add(p2); LOGenerate gen = new LOGenerate(innerPlan, eps, new boolean[] {false, true}); innerPlan.add(gen); innerPlan.connect(l1, gen); innerPlan.connect(l2, gen); foreach2.setInnerPlan(innerPlan); expected.add(foreach2); LOStore s = new LOStore( expected, new FileSpec("file:///test/empty", new FuncSpec("org.apache.pig.builtin.PigStorage"))); expected.add(s); expected.connect(load, foreach2); expected.connect(foreach2, s); try { UidStamper stamper = new UidStamper(expected); stamper.visit(); } catch (Exception e) { throw new VisitorException(e); } assertTrue(expected.isEqual(newPlan)); LogicalSchema schema = foreach2.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d::id", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); assertTrue(schema.getField("id") == schema.getField(0)); assertTrue(schema.getField("d::id") == schema.getField(1)); }
public void testForeachSchema() throws Exception { // test flatten LogicalPlanTester lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:tuple(v, s));"); LogicalPlan plan = lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSinks().get(0); LogicalSchema s2 = new LogicalSchema(); s2.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(s2.isEqual(op.getSchema())); // test no flatten lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v, s)});"); plan = lpt.buildPlan("b = foreach a generate id, d;"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); assertTrue(aschema.isEqual(op.getSchema())); // check with defined data type lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0); LogicalSchema schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); // test with add lpt = new LogicalPlanTester(); lpt.buildPlan("a = load '/test/d.txt' as (id, v:int, s:int);"); lpt.buildPlan("b = foreach a generate id, v+s;"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator) newPlan.getSinks().get(0); op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0); schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER)); assertTrue(schema.isEqual(aschema)); }