private void run(PhysicalPlan pp, String expectedFile) throws Exception {
    String compiledPlan, goldenPlan = null;
    int MAX_SIZE = 100000;
    MRCompiler comp = new MRCompiler(pp, pc);
    comp.compile();

    MROperPlan mrp = comp.getMRPlan();
    PlanPrinter ppp = new PlanPrinter(mrp);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ppp.print(baos);
    compiledPlan = baos.toString();

    if (generate) {
      FileOutputStream fos = new FileOutputStream(expectedFile);
      fos.write(baos.toByteArray());
      return;
    }
    FileInputStream fis = new FileInputStream(expectedFile);
    byte[] b = new byte[MAX_SIZE];
    int len = fis.read(b);
    goldenPlan = new String(b, 0, len);
    if (goldenPlan.charAt(len - 1) == '\n') goldenPlan = goldenPlan.substring(0, len - 1);

    pp.explain(System.out);
    System.out.println();
    System.out.println("<<<" + compiledPlan + ">>>");
    System.out.println("-------------");
    System.out.println("Golden");
    System.out.println("<<<" + goldenPlan + ">>>");
    System.out.println("-------------");
    assertEquals(goldenPlan, compiledPlan);
  }
Beispiel #2
0
  @Test
  public void testMergeJoin() throws Exception {
    String query =
        "a = load '/tmp/input1';"
            + "b = load '/tmp/input2';"
            + "c = join a by $0, b by $0 using 'merge';"
            + "store c into '/tmp/output1';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MRCompiler comp = new MRCompiler(pp, pc);
    comp.compile();
    MROperPlan mrp = comp.getMRPlan();
    assertTrue(mrp.size() == 2);

    MapReduceOper mrOp0 = mrp.getRoots().get(0);
    assertTrue(mrOp0.mapPlan.size() == 2);
    PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0);
    MergeJoinIndexer func =
        (MergeJoinIndexer)
            PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec());
    Field lrField = MergeJoinIndexer.class.getDeclaredField("lr");
    lrField.setAccessible(true);
    POLocalRearrange lr = (POLocalRearrange) lrField.get(func);
    List<PhysicalPlan> innerPlans = lr.getPlans();
    PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0);
    assertTrue(localrearrange0 instanceof POLocalRearrange);
    assertTrue(mrOp0.reducePlan.size() == 3);
    PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0);
    assertTrue(pack0 instanceof POPackage);
    PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0);
    assertTrue(foreach0 instanceof POForEach);
    PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0);
    assertTrue(store0 instanceof POStore);

    assertTrue(innerPlans.size() == 1);
    PhysicalPlan innerPlan = innerPlans.get(0);
    assertTrue(innerPlan.size() == 1);
    PhysicalOperator project = innerPlan.getRoots().get(0);
    assertTrue(project instanceof POProject);
    assertTrue(((POProject) project).getColumn() == 0);

    MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0);
    assertTrue(mrOp1.mapPlan.size() == 3);
    PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0);
    assertTrue(load1 instanceof POLoad);
    PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0);
    assertTrue(mergejoin1 instanceof POMergeJoin);
    PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0);
    assertTrue(store1 instanceof POStore);
    assertTrue(mrOp1.reducePlan.isEmpty());
  }