コード例 #1
0
  public void testCoGroup() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);");
    lpt.buildPlan("b = group a by name;");
    LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);

    LogicalSchema loadSchema =
        ((LogicalRelationalOperator) newPlan.getSources().get(0)).getSchema();

    Set<Long> uids = getAllUids(loadSchema);

    LogicalRelationalOperator op =
        (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);
    assertEquals(LOCogroup.class, op.getClass());
    LogicalSchema schema = op.getSchema();

    assertEquals(2, schema.size());
    assertEquals(DataType.CHARARRAY, schema.getField(0).type);
    assertEquals(false, uids.contains(schema.getField(0).uid));
    assertEquals(0, schema.getField(0).alias.compareTo("group"));

    assertEquals(DataType.BAG, schema.getField(1).type);

    assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type);
    assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name"));
    assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type);
    assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age"));
    assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid);
    assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type);
    assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa"));
    assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid);

    uids.add(Long.valueOf(schema.getField(0).uid));
    assertEquals(false, uids.contains(schema.getField(1).uid));

    assertEquals(
        LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass());
    LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);

    MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans();
    assertEquals(1, expressionPlans.size());
    List<LogicalExpressionPlan> plans =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0));
    assertEquals(1, plans.size());

    LogicalExpressionPlan exprPlan = plans.get(0);
    assertEquals(1, exprPlan.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass());
    ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0);
    assertEquals(loadSchema.getField(0).uid, prj.getUid());
    assertEquals(0, prj.getColNum());
    assertEquals(0, prj.getInputNum());
  }
コード例 #2
0
  public void testCoGroup4() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);");
    lpt.buildPlan("b = load '/test/e.txt' as (name:chararray, age:int, blah:chararray );");
    lpt.buildPlan("c = group a by ( name, age ), b by ( name, age );");
    LogicalPlan plan = lpt.buildPlan("store c into '/test/empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);

    assertEquals(
        LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass());
    LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);

    // Reason for this strange way of getting the load schema is to maintain the sequence correctly
    LogicalSchema loadSchema =
        ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(0)).getSchema();

    LogicalSchema load2Schema =
        ((LogicalRelationalOperator) newPlan.getPredecessors(cogroup).get(1)).getSchema();

    Set<Long> uids = getAllUids(loadSchema);
    uids.addAll(getAllUids(load2Schema));

    LogicalRelationalOperator op =
        (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);
    assertEquals(LOCogroup.class, op.getClass());
    LogicalSchema schema = op.getSchema();

    assertEquals(3, schema.size());
    assertEquals(DataType.TUPLE, schema.getField(0).type);
    assertEquals(false, uids.contains(schema.getField(0).uid));
    assertEquals(0, schema.getField(0).alias.compareTo("group"));
    assertEquals(DataType.CHARARRAY, schema.getField(0).schema.getField(0).type);
    assertEquals(0, schema.getField(0).schema.getField(0).alias.compareTo("name"));
    assertEquals(DataType.INTEGER, schema.getField(0).schema.getField(1).type);
    assertEquals(0, schema.getField(0).schema.getField(1).alias.compareTo("age"));

    assertEquals(DataType.BAG, schema.getField(1).type);

    assertEquals(DataType.CHARARRAY, schema.getField(1).schema.getField(0).type);
    assertEquals(0, schema.getField(1).schema.getField(0).alias.compareTo("name"));
    assertEquals(loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(1).schema.getField(1).type);
    assertEquals(0, schema.getField(1).schema.getField(1).alias.compareTo("age"));
    assertEquals(loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid);
    assertEquals(DataType.FLOAT, schema.getField(1).schema.getField(2).type);
    assertEquals(0, schema.getField(1).schema.getField(2).alias.compareTo("gpa"));
    assertEquals(loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid);

    assertEquals(DataType.BAG, schema.getField(2).type);

    assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(0).type);
    assertEquals(0, schema.getField(2).schema.getField(0).alias.compareTo("name"));
    assertEquals(load2Schema.getField(0).uid, schema.getField(2).schema.getField(0).uid);
    assertEquals(DataType.INTEGER, schema.getField(2).schema.getField(1).type);
    assertEquals(0, schema.getField(2).schema.getField(1).alias.compareTo("age"));
    assertEquals(load2Schema.getField(1).uid, schema.getField(2).schema.getField(1).uid);
    assertEquals(DataType.CHARARRAY, schema.getField(2).schema.getField(2).type);
    assertEquals(0, schema.getField(2).schema.getField(2).alias.compareTo("blah"));
    assertEquals(load2Schema.getField(2).uid, schema.getField(2).schema.getField(2).uid);

    // We are doing Uid tests at the end as the uids should not repeat
    assertEquals(false, uids.contains(schema.getField(0).schema.getField(0).uid));
    assertEquals(false, uids.contains(schema.getField(0).schema.getField(1).uid));
    assertEquals(false, uids.contains(schema.getField(1).uid));
    uids.add(schema.getField(1).uid);
    assertEquals(false, uids.contains(schema.getField(2).uid));

    MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans();
    assertEquals(2, expressionPlans.size());
    List<LogicalExpressionPlan> plans =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0));
    assertEquals(2, plans.size());

    List<LogicalExpressionPlan> plans2 =
        (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(1));
    assertEquals(2, plans2.size());

    LogicalExpressionPlan exprPlan = plans.get(0);
    assertEquals(1, exprPlan.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan.getSinks().get(0).getClass());
    ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0);
    assertEquals(loadSchema.getField(0).uid, prj.getUid());
    assertEquals(0, prj.getColNum());
    assertEquals(0, prj.getInputNum());

    LogicalExpressionPlan exprPlan2 = plans.get(1);
    assertEquals(1, exprPlan2.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan2.getSinks().get(0).getClass());
    ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0);
    assertEquals(loadSchema.getField(1).uid, prj2.getUid());
    assertEquals(1, prj2.getColNum());
    assertEquals(0, prj2.getInputNum());

    LogicalExpressionPlan exprPlan3 = plans2.get(0);
    assertEquals(1, exprPlan3.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan3.getSinks().get(0).getClass());
    ProjectExpression prj3 = (ProjectExpression) exprPlan3.getSinks().get(0);
    assertEquals(load2Schema.getField(0).uid, prj3.getUid());
    assertEquals(0, prj3.getColNum());
    assertEquals(1, prj3.getInputNum());

    LogicalExpressionPlan exprPlan4 = plans2.get(1);
    assertEquals(1, exprPlan4.getSinks().size());
    assertEquals(ProjectExpression.class, exprPlan4.getSinks().get(0).getClass());
    ProjectExpression prj4 = (ProjectExpression) exprPlan4.getSinks().get(0);
    assertEquals(load2Schema.getField(1).uid, prj4.getUid());
    assertEquals(1, prj4.getColNum());
    assertEquals(1, prj4.getInputNum());
  }
コード例 #3
0
  public void testJoinPlan() throws Exception {
    LogicalPlanTester lpt = new LogicalPlanTester();
    lpt.buildPlan("a = load 'd1.txt' as (id, c);");
    lpt.buildPlan("b = load 'd2.txt'as (id, c);");
    lpt.buildPlan("c = join a by id, b by c;");
    lpt.buildPlan("d = filter c by a::id==NULL AND b::c==NULL;");
    LogicalPlan plan = lpt.buildPlan("store d into 'empty';");

    // check basics
    org.apache.pig.experimental.logical.relational.LogicalPlan newPlan = migratePlan(plan);
    assertEquals(5, newPlan.size());
    assertEquals(newPlan.getSources().size(), 2);

    // check load and join
    LogicalRelationalOperator op =
        (LogicalRelationalOperator) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0);
    assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOJoin.class);
    assertEquals(((LOJoin) op).getJoinType(), LOJoin.JOINTYPE.HASH);

    LogicalRelationalOperator l1 = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(0);
    assertEquals(l1.getClass(), org.apache.pig.experimental.logical.relational.LOLoad.class);
    assertEquals(l1.getAlias(), "a");

    LogicalRelationalOperator l2 = (LogicalRelationalOperator) newPlan.getPredecessors(op).get(1);
    assertEquals(l2.getClass(), org.apache.pig.experimental.logical.relational.LOLoad.class);
    assertEquals(l2.getAlias(), "b");

    // check join input plans
    LogicalExpressionPlan p1 = ((LOJoin) op).getJoinPlan(0).iterator().next();
    assertEquals(p1.size(), 1);

    ProjectExpression prj = (ProjectExpression) p1.getSources().get(0);

    assertEquals(prj.getInputNum(), 0);
    assertEquals(prj.getColNum(), 0);

    LogicalExpressionPlan p2 = ((LOJoin) op).getJoinPlan(1).iterator().next();
    assertEquals(p2.size(), 1);

    prj = (ProjectExpression) p2.getSources().get(0);

    assertEquals(prj.getInputNum(), 1);
    assertEquals(prj.getColNum(), 1);

    // check filter
    op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0);
    assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOFilter.class);
    LogicalExpressionPlan exp =
        ((org.apache.pig.experimental.logical.relational.LOFilter) op).getFilterPlan();

    AndExpression ae = (AndExpression) exp.getSources().get(0);

    EqualExpression eq = (EqualExpression) exp.getSuccessors(ae).get(0);
    assertEquals(eq.getLhs().getClass(), ProjectExpression.class);
    assertEquals(((ProjectExpression) eq.getLhs()).getColNum(), 0);
    assertEquals(((ProjectExpression) eq.getLhs()).getInputNum(), 0);

    assertEquals(eq.getRhs().getClass(), ConstantExpression.class);

    eq = (EqualExpression) exp.getSuccessors(ae).get(1);
    assertEquals(eq.getLhs().getClass(), ProjectExpression.class);
    assertEquals(((ProjectExpression) eq.getLhs()).getColNum(), 3);
    assertEquals(((ProjectExpression) eq.getLhs()).getInputNum(), 0);

    assertEquals(eq.getRhs().getClass(), ConstantExpression.class);

    // check store
    op = (LogicalRelationalOperator) newPlan.getSuccessors(op).get(0);
    assertEquals(op.getClass(), org.apache.pig.experimental.logical.relational.LOStore.class);
  }