Exemple #1
0
  @Test
  public void testLimitAdjusterFuncShipped() throws Exception {
    String query =
        "a = load 'input';"
            + "b = order a by $0 parallel 2;"
            + "c = limit b 7;"
            + "store c into 'output' using "
            + PigStorageNoDefCtor.class.getName()
            + "('\t');";

    PhysicalPlan pp = Util.buildPp(pigServerMR, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    LimitAdjuster la = new LimitAdjuster(mrPlan, pc);
    la.visit();
    la.adjust();

    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    int count = 1;

    while (mrPlan.getSuccessors(mrOper) != null) {
      mrOper = mrPlan.getSuccessors(mrOper).get(0);
      ++count;
    }
    assertEquals(4, count);

    MapReduceOper op = mrPlan.getLeaves().get(0);
    assertTrue(op.UDFs.contains(new FuncSpec(PigStorageNoDefCtor.class.getName()) + "('\t')"));
  }
Exemple #2
0
  /**
   * Test to ensure that the order by without parallel followed by a limit, i.e., top k always
   * produces the correct number of map reduce jobs. In the testcase below since we are running the
   * unit test locally, we will get reduce parallelism as 1. So we will NOT introduce the extra MR
   * job to do a final limit
   */
  @Test
  public void testNumReducersInLimit() throws Exception {
    String query =
        "a = load 'input';" + "b = order a by $0;" + "c = limit b 10;" + "store c into 'output';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    int count = 1;

    while (mrPlan.getSuccessors(mrOper) != null) {
      mrOper = mrPlan.getSuccessors(mrOper).get(0);
      ++count;
    }
    assertEquals(3, count);
  }
  /**
   * Test to ensure that the order by with parallel followed by a limit, i.e., top k always produces
   * the correct number of map reduce jobs
   */
  @Test
  public void testNumReducersInLimitWithParallel() throws Exception {
    planTester.buildPlan("a = load 'input';");
    planTester.buildPlan("b = order a by $0 parallel 2;");
    planTester.buildPlan("c = limit b 10;");
    LogicalPlan lp = planTester.buildPlan("store c into '/tmp';");

    PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    int count = 1;

    while (mrPlan.getSuccessors(mrOper) != null) {
      mrOper = mrPlan.getSuccessors(mrOper).get(0);
      ++count;
    }
    assertTrue(count == 4);
  }
Exemple #4
0
  @Test
  public void testMergeJoin() throws Exception {
    String query =
        "a = load '/tmp/input1';"
            + "b = load '/tmp/input2';"
            + "c = join a by $0, b by $0 using 'merge';"
            + "store c into '/tmp/output1';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MRCompiler comp = new MRCompiler(pp, pc);
    comp.compile();
    MROperPlan mrp = comp.getMRPlan();
    assertTrue(mrp.size() == 2);

    MapReduceOper mrOp0 = mrp.getRoots().get(0);
    assertTrue(mrOp0.mapPlan.size() == 2);
    PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0);
    MergeJoinIndexer func =
        (MergeJoinIndexer)
            PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec());
    Field lrField = MergeJoinIndexer.class.getDeclaredField("lr");
    lrField.setAccessible(true);
    POLocalRearrange lr = (POLocalRearrange) lrField.get(func);
    List<PhysicalPlan> innerPlans = lr.getPlans();
    PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0);
    assertTrue(localrearrange0 instanceof POLocalRearrange);
    assertTrue(mrOp0.reducePlan.size() == 3);
    PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0);
    assertTrue(pack0 instanceof POPackage);
    PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0);
    assertTrue(foreach0 instanceof POForEach);
    PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0);
    assertTrue(store0 instanceof POStore);

    assertTrue(innerPlans.size() == 1);
    PhysicalPlan innerPlan = innerPlans.get(0);
    assertTrue(innerPlan.size() == 1);
    PhysicalOperator project = innerPlan.getRoots().get(0);
    assertTrue(project instanceof POProject);
    assertTrue(((POProject) project).getColumn() == 0);

    MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0);
    assertTrue(mrOp1.mapPlan.size() == 3);
    PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0);
    assertTrue(load1 instanceof POLoad);
    PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0);
    assertTrue(mergejoin1 instanceof POMergeJoin);
    PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0);
    assertTrue(store1 instanceof POStore);
    assertTrue(mrOp1.reducePlan.isEmpty());
  }
Exemple #5
0
  @Test
  public void testUDFInMergedCoGroup() throws Exception {
    String query =
        "a = load 'input1' using "
            + TestCollectableLoadFunc.class.getName()
            + "();"
            + "b = load 'input2' using "
            + TestIndexableLoadFunc.class.getName()
            + "();"
            + "c = cogroup a by $0, b by $0 using 'merge';"
            + "store c into 'output';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    MapReduceOper mrOper = mrPlan.getRoots().get(0);

    assertTrue(mrOper.UDFs.contains(TestCollectableLoadFunc.class.getName()));
    mrOper = mrPlan.getSuccessors(mrOper).get(0);
    assertTrue(mrOper.UDFs.contains(TestCollectableLoadFunc.class.getName()));
    assertTrue(mrOper.UDFs.contains(TestIndexableLoadFunc.class.getName()));
  }