/**
   * Obtains all the uids from the schema
   *
   * @param schema
   * @return Set of uids from this schema. Its a recursive call
   */
  private Set<Long> getAllUids(LogicalSchema schema) {
    Set<Long> uids = new HashSet<Long>();

    if (schema != null) {
      for (LogicalFieldSchema fieldSchema : schema.getFields()) {
        if ((fieldSchema.type == DataType.BAG || fieldSchema.type == DataType.TUPLE)
            && fieldSchema.schema != null) {
          uids.addAll(getAllUids(fieldSchema.schema));
        } else {
          uids.add(fieldSchema.uid);
        }
      }
    }
    return uids;
  }
  public void testCoGroup4() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);");
    lpt.buildPlan("b = load '/test/e.txt' as (name:chararray, age:int, blah:chararray );");
    lpt.buildPlan("c = group a by ( name, age ), b by ( name, age );");
    LogicalPlan plan = lpt.buildPlan("store c into '/test/empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);

    assertEquals(
        LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass());
    LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);

    // Reason for this strange way of getting the load schema is to maintain the sequence correctly
    LogicalSchema loadSchema =
        ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(0)).getSchema();

    LogicalSchema load2Schema =
        ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(1)).getSchema();

    Set<Long> uids = getAllUids(loadSchema);
    uids.addAll(getAllUids(load2Schema));

    LogicalRelationalOperator op =
        (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);
    assertEquals(LOCogroup.class, op.getClass());
    LogicalSchema schema = op.getSchema();

    assertEquals(3, schema.size());
    assertEquals(DataType.TUPLE, schema.getField(0).type);
    assertEquals(false, uids.contains(schema.getField(0).uid));
    assertEquals(0, schema.getField(0).alias.compareTo("group"));
    assertEquals(DataType.CHARARRAY, schema.getField(0).schema.getField(0).type);
    assertEquals(0, schema.getField(0).schema.getField(0).alias.compareTo("name"));
    assertEquals(DataType.INTEGER, schema.getField(0).schema.getField(1).type);
    assertEquals(0, schema.getField(0).schema.getField(1).alias.compareTo("age"));

    assertEquals(DataType.BAG, schema.getField(1).type);

    assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type);
    assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name"));
    assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type);
    assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age"));
    assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid);
    assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type);
    assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa"));
    assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid);

    assertEquals(DataType.BAG, schema.getField(2).type);

    assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(0).type);
    assertEquals(0, schema.getField(2).schema.getField(0).alias.compareTo("name"));
    assertEquals(load2Schema.getField(0).uid, schema.getField(2).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(2).schema.getField(1).type);
    assertEquals(0, schema.getField(2).schema.getField(1).alias.compareTo("age"));
    assertEquals(load2Schema.getField(1).uid, schema.getField(2).schema.getField(1).uid);
    assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(2).type);
    assertEquals(0, schema.getField(2).schema.getField(2).alias.compareTo("blah"));
    assertEquals(load2Schema.getField(2).uid, schema.getField(2).schema.getField(2).uid);

    // We are doing Uid tests at the end as the uids should not repeat
    assertEquals(false, uids.contains(schema.getField(0).schema.getField(0).uid));
    assertEquals(false, uids.contains(schema.getField(0).schema.getField(1).uid));
    assertEquals(false, uids.contains(schema.getField(1).uid));
    uids.add(schema.getField(1).uid);
    assertEquals(false, uids.contains(schema.getField(2).uid));

    MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans();
    assertEquals(2, expressionPlans.size());
    List<LogicalExpressionPlan> plans =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0));
    assertEquals(2, plans.size());

    List<LogicalExpressionPlan> plans2 =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(1));
    assertEquals(2, plans2.size());

    LogicalExpressionPlan exprPlan = plans.get(0);
    assertEquals(1, exprPlan.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass());
    ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0);
    assertEquals(loadSchema.getField(0).uid, prj.getUid());
    assertEquals(0, prj.getColNum());
    assertEquals(0, prj.getInputNum());

    LogicalExpressionPlan exprPlan2 = plans.get(1);
    assertEquals(1, exprPlan2.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan2.getSinks().get(0).getClass());
    ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0);
    assertEquals(loadSchema.getField(1).uid, prj2.getUid());
    assertEquals(1, prj2.getColNum());
    assertEquals(0, prj2.getInputNum());

    LogicalExpressionPlan exprPlan3 = plans2.get(0);
    assertEquals(1, exprPlan3.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan3.getSinks().get(0).getClass());
    ProjectExpression prj3 = (ProjectExpression) exprPlan3.getSinks().get(0);
    assertEquals(load2Schema.getField(0).uid, prj3.getUid());
    assertEquals(0, prj3.getColNum());
    assertEquals(1, prj3.getInputNum());

    LogicalExpressionPlan exprPlan4 = plans2.get(1);
    assertEquals(1, exprPlan4.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan4.getSinks().get(0).getClass());
    ProjectExpression prj4 = (ProjectExpression) exprPlan4.getSinks().get(0);
    assertEquals(load2Schema.getField(1).uid, prj4.getUid());
    assertEquals(1, prj4.getColNum());
    assertEquals(1, prj4.getInputNum());
  }
  public void testForeachPlan2() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(id:int, s)});");
    lpt.buildPlan("b = foreach a generate id, FLATTEN(d);");
    LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);

    org.apache.pig.experimental.logical.relational.LogicalPlan expected =
        new org.apache.pig.experimental.logical.relational.LogicalPlan();

    LogicalSchema aschema = new LogicalSchema();
    aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    LogicalSchema aschema2 = new LogicalSchema();
    LogicalSchema aschema3 = new LogicalSchema();
    aschema3.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER));
    aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY));
    aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG));

    LOLoad load =
        new LOLoad(
            new FileSpec("file:///test/d.txt", new FuncSpec("org.apache.pig.builtin.PigStorage")),
            aschema,
            expected);
    expected.add(load);

    LOForEach foreach2 = new LOForEach(expected);
    org.apache.pig.experimental.logical.relational.LogicalPlan innerPlan =
        new org.apache.pig.experimental.logical.relational.LogicalPlan();
    LOInnerLoad l1 = new LOInnerLoad(innerPlan, foreach2, 0);
    innerPlan.add(l1);
    LOInnerLoad l2 = new LOInnerLoad(innerPlan, foreach2, 1);

    List<LogicalExpressionPlan> eps = new ArrayList<LogicalExpressionPlan>();
    LogicalExpressionPlan p1 = new LogicalExpressionPlan();
    new ProjectExpression(p1, DataType.BYTEARRAY, 0, 0);
    LogicalExpressionPlan p2 = new LogicalExpressionPlan();
    new ProjectExpression(p2, DataType.BAG, 1, 0);
    eps.add(p1);
    eps.add(p2);

    LOGenerate gen = new LOGenerate(innerPlan, eps, new boolean[] {false, true});
    innerPlan.add(gen);
    innerPlan.connect(l1, gen);
    innerPlan.connect(l2, gen);

    foreach2.setInnerPlan(innerPlan);
    expected.add(foreach2);

    LOStore s =
        new LOStore(
            expected,
            new FileSpec("file:///test/empty", new FuncSpec("org.apache.pig.builtin.PigStorage")));

    expected.add(s);

    expected.connect(load, foreach2);

    expected.connect(foreach2, s);
    try {
      UidStamper stamper = new UidStamper(expected);
      stamper.visit();
    } catch (Exception e) {
      throw new VisitorException(e);
    }

    assertTrue(expected.isEqual(newPlan));

    LogicalSchema schema = foreach2.getSchema();
    aschema = new LogicalSchema();
    aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("d::id", null, DataType.INTEGER));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY));
    assertTrue(schema.isEqual(aschema));
    assertTrue(schema.getField("id") == schema.getField(0));
    assertTrue(schema.getField("d::id") == schema.getField(1));
  }
  public void testCoGroup() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);");
    lpt.buildPlan("b = group a by name;");
    LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);

    LogicalSchema loadSchema =
        ((LogicalRelationalOperator) newPlan.getSources().get(0)).getSchema();

    Set<Long> uids = getAllUids(loadSchema);

    LogicalRelationalOperator op =
        (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);
    assertEquals(LOCogroup.class, op.getClass());
    LogicalSchema schema = op.getSchema();

    assertEquals(2, schema.size());
    assertEquals(DataType.CHARARRAY, schema.getField(0).type);
    assertEquals(false, uids.contains(schema.getField(0).uid));
    assertEquals(0, schema.getField(0).alias.compareTo("group"));

    assertEquals(DataType.BAG, schema.getField(1).type);

    assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type);
    assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name"));
    assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type);
    assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age"));
    assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid);
    assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type);
    assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa"));
    assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid);

    uids.add(Long.valueOf(schema.getField(0).uid));
    assertEquals(false, uids.contains(schema.getField(1).uid));

    assertEquals(
        LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass());
    LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);

    MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans();
    assertEquals(1, expressionPlans.size());
    List<LogicalExpressionPlan> plans =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0));
    assertEquals(1, plans.size());

    LogicalExpressionPlan exprPlan = plans.get(0);
    assertEquals(1, exprPlan.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass());
    ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0);
    assertEquals(loadSchema.getField(0).uid, prj.getUid());
    assertEquals(0, prj.getColNum());
    assertEquals(0, prj.getInputNum());
  }
  public void testForeachSchema() throws Exception {
    // test flatten
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (id, d:tuple(v, s));");
    LogicalPlan plan = lpt.buildPlan("b = foreach a generate id, FLATTEN(d);");

    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);
    LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSinks().get(0);

    LogicalSchema s2 = new LogicalSchema();
    s2.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    s2.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY));
    s2.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY));
    assertTrue(s2.isEqual(op.getSchema()));

    // test no flatten
    lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v, s)});");
    plan = lpt.buildPlan("b = foreach a generate id, d;");

    newPlan = migratePlan(plan);
    op = (LogicalRelationalOperator) newPlan.getSinks().get(0);

    LogicalSchema aschema = new LogicalSchema();
    aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    LogicalSchema aschema2 = new LogicalSchema();
    LogicalSchema aschema3 = new LogicalSchema();
    aschema3.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY));
    aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY));
    aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG));

    assertTrue(aschema.isEqual(op.getSchema()));

    // check with defined data type
    lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v:int, s)});");
    lpt.buildPlan("b = foreach a generate id, FLATTEN(d);");
    plan = lpt.buildPlan("store b into '/test/empty';");

    newPlan = migratePlan(plan);
    op = (LogicalRelationalOperator) newPlan.getSinks().get(0);
    op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0);
    LogicalSchema schema = op.getSchema();

    aschema = new LogicalSchema();
    aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.INTEGER));
    aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY));
    assertTrue(schema.isEqual(aschema));

    // test with add
    lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (id, v:int, s:int);");
    lpt.buildPlan("b = foreach a generate id, v+s;");
    plan = lpt.buildPlan("store b into '/test/empty';");

    newPlan = migratePlan(plan);
    op = (LogicalRelationalOperator) newPlan.getSinks().get(0);
    op = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0);
    schema = op.getSchema();

    aschema = new LogicalSchema();
    aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY));
    aschema.addField(new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER));
    assertTrue(schema.isEqual(aschema));
  }