コード例 #1
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
  /**
   * Replace old POLocalRearrange with new pre-combine LR, add new map foreach, new
   * map-local-rearrange, and connect them
   *
   * @param mapPlan
   * @param preCombinerLR
   * @param mfe
   * @param mapAgg
   * @param mlr
   * @throws PlanException
   */
  private static void patchUpMap(
      PhysicalPlan mapPlan,
      POPreCombinerLocalRearrange preCombinerLR,
      POForEach mfe,
      POPartialAgg mapAgg,
      POLocalRearrange mlr)
      throws PlanException {
    POLocalRearrange oldLR = (POLocalRearrange) mapPlan.getLeaves().get(0);
    mapPlan.replace(oldLR, preCombinerLR);

    mapPlan.add(mfe);
    mapPlan.connect(preCombinerLR, mfe);

    // the operator before local rearrange
    PhysicalOperator opBeforeLR = mfe;

    if (mapAgg != null) {
      mapPlan.add(mapAgg);
      mapPlan.connect(mfe, mapAgg);
      opBeforeLR = mapAgg;
    }

    mapPlan.add(mlr);
    mapPlan.connect(opBeforeLR, mlr);
  }
コード例 #2
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  // Tests Single input case for both blocking and non-blocking
  // with both map and reduce phases
  @Test
  public void testSim1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    POLoad ld = GenPhyOp.topLoadOp();
    php.add(ld);
    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);

    php.connect(ld, grpChain1.getRoots().get(0));

    PhysicalOperator leaf = php.getLeaves().get(0);

    PhysicalPlan grpChain2 = GenPhyOp.grpChain();
    php.merge(grpChain2);

    php.connect(leaf, grpChain2.getRoots().get(0));

    leaf = php.getLeaves().get(0);
    POFilter fl = GenPhyOp.topFilterOp();
    php.add(fl);

    php.connect(leaf, fl);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(fl, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC1.gld");
  }
コード例 #3
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
  /**
   * add algebraic functions with appropriate projection to new foreach in combiner
   *
   * @param cfe - the new foreach in combiner
   * @param op2newpos - mapping of physical operator to position in input
   * @throws CloneNotSupportedException
   * @throws PlanException
   */
  private static void addAlgebraicFuncToCombineFE(
      POForEach cfe, Map<PhysicalOperator, Integer> op2newpos)
      throws CloneNotSupportedException, PlanException {
    // an array that we will first populate with physical operators in order
    // of their position in input. Used while adding plans to combine
    // foreach just so that output of combine foreach same positions as
    // input. That means the same operator to position mapping can be used
    // by reduce as well
    PhysicalOperator[] opsInOrder = new PhysicalOperator[op2newpos.size() + 1];
    for (Map.Entry<PhysicalOperator, Integer> op2pos : op2newpos.entrySet()) {
      opsInOrder[op2pos.getValue()] = op2pos.getKey();
    }

    // first position is used by group column and a plan has been added for
    // it, so start with 1
    for (int i = 1; i < opsInOrder.length; i++) {
      // create new inner plan for foreach add cloned copy of given
      // physical operator and a new project. Even if the udf in query
      // takes multiple input, only one project needs to be added because
      // input to this udf will be the INITIAL version of udf evaluated in
      // map.
      PhysicalPlan newPlan = new PhysicalPlan();
      PhysicalOperator newOp = opsInOrder[i].clone();
      newPlan.add(newOp);
      POProject proj = new POProject(createOperatorKey(cfe.getOperatorKey().getScope()), 1, i);
      proj.setResultType(DataType.BAG);
      newPlan.add(proj);
      newPlan.connect(proj, newOp);
      cfe.addInputPlan(newPlan, false);
    }
  }
コード例 #4
0
  public void testLimit() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lC = GenPhyOp.topLoadOp();
    php.add(lC);

    POLimit op = new POLimit(new OperatorKey("", r.nextLong()), -1, null);

    php.add(op);
    php.connect(lC, op);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC17.gld");
  }
コード例 #5
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  @Test
  public void testSim4() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
    PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

    php.merge(ldGrpChain1);
    php.merge(ldGrpChain2);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();

    php.merge(ldFil1);
    php.connect(ldFil1.getLeaves().get(0), un);

    php.merge(ldFil2);
    php.connect(ldFil2.getLeaves().get(0), un);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(un, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC4.gld");
  }
コード例 #6
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  @Test
  public void testSpl2() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    POFilter fl2 = GenPhyOp.topFilterOp();
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
    php.add(lr1);
    php.add(lr2);
    php.connect(fl1, lr1);
    php.connect(fl2, lr2);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.addAsLeaf(pk);

    POSplit sp2 = GenPhyOp.topSplitOp();
    php.addAsLeaf(sp2);

    POFilter fl3 = GenPhyOp.topFilterOp();
    POFilter fl4 = GenPhyOp.topFilterOp();
    php.add(fl3);
    php.add(fl4);
    php.connect(sp2, fl3);
    php.connect(sp2, fl4);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC13.gld");
  }
コード例 #7
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
 /**
  * Recursively clone op and its predecessors from pplan and add them to newplan
  *
  * @param op
  * @param pplan
  * @param newplan
  * @return
  * @throws CloneNotSupportedException
  * @throws PlanException
  */
 private static PhysicalOperator addPredecessorsToPlan(
     PhysicalOperator op, PhysicalPlan pplan, PhysicalPlan newplan)
     throws CloneNotSupportedException, PlanException {
   PhysicalOperator newOp = op.clone();
   newplan.add(newOp);
   if (pplan.getPredecessors(op) == null || pplan.getPredecessors(op).size() == 0) {
     return newOp;
   }
   for (PhysicalOperator pred : pplan.getPredecessors(op)) {
     PhysicalOperator newPred = addPredecessorsToPlan(pred, pplan, newplan);
     newplan.connect(newPred, newOp);
   }
   return newOp;
 }
コード例 #8
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
 /**
  * Create a new foreach with same scope,alias as given foreach add an inner plan that projects the
  * group column, which is going to be the first input
  *
  * @param foreach source foreach
  * @param keyType type for group-by key
  * @return new POForeach
  */
 private static POForEach createForEachWithGrpProj(POForEach foreach, byte keyType) {
   String scope = foreach.getOperatorKey().scope;
   POForEach newFE = new POForEach(createOperatorKey(scope), new ArrayList<PhysicalPlan>());
   newFE.addOriginalLocation(foreach.getAlias(), foreach.getOriginalLocations());
   newFE.setResultType(foreach.getResultType());
   // create plan that projects the group column
   PhysicalPlan grpProjPlan = new PhysicalPlan();
   // group by column is the first column
   POProject proj = new POProject(createOperatorKey(scope), 1, 0);
   proj.setResultType(keyType);
   grpProjPlan.add(proj);
   newFE.addInputPlan(grpProjPlan, false);
   return newFE;
 }
コード例 #9
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
 /**
  * @param op
  * @param index
  * @param plan
  * @throws PlanException
  */
 private static void setProjectInput(PhysicalOperator op, PhysicalPlan plan, int index)
     throws PlanException {
   String scope = op.getOperatorKey().scope;
   POProject proj =
       new POProject(
           new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)),
           op.getRequestedParallelism(),
           index);
   proj.setResultType(DataType.BAG);
   // Remove old connections and elements from the plan
   plan.trimAbove(op);
   plan.add(proj);
   plan.connect(proj, op);
   List<PhysicalOperator> inputs = Lists.newArrayList();
   inputs.add(proj);
   op.setInputs(inputs);
 }
コード例 #10
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
  /**
   * create new Local rearrange by cloning existing rearrange and add plan for projecting the key
   *
   * @param rearrange
   * @return
   * @throws PlanException
   * @throws CloneNotSupportedException
   */
  private static POLocalRearrange getNewRearrange(POLocalRearrange rearrange)
      throws PlanException, CloneNotSupportedException {
    POLocalRearrange newRearrange = rearrange.clone();

    // Set the projection to be the key
    PhysicalPlan newPlan = new PhysicalPlan();
    String scope = newRearrange.getOperatorKey().scope;
    POProject proj =
        new POProject(
            new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), -1, 0);
    proj.setResultType(newRearrange.getKeyType());
    newPlan.add(proj);

    List<PhysicalPlan> plans = new ArrayList<PhysicalPlan>(1);
    plans.add(newPlan);
    newRearrange.setPlansFromCombiner(plans);

    return newRearrange;
  }
コード例 #11
0
  public void testSpl1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    POFilter fl2 = GenPhyOp.topFilterOp();
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
    php.add(lr1);
    php.add(lr2);
    php.connect(fl1, lr1);
    php.connect(fl2, lr2);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.add(gr);
    php.connect(lr1, gr);
    php.connect(lr2, gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.add(pk);
    php.connect(gr, pk);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);
    php.connect(pk, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC12.gld");
  }
コード例 #12
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  @Test
  public void testSortUDF1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    php.merge(ldFil1);

    // set up order by *
    String funcName = WeirdComparator.class.getName();
    POUserComparisonFunc comparator =
        new POUserComparisonFunc(
            new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcName));
    POSort sort =
        new POSort(
            new OperatorKey("", r.nextLong()),
            -1,
            ldFil1.getLeaves(),
            null,
            new ArrayList<Boolean>(),
            comparator);
    sort.setRequestedParallelism(20);
    PhysicalPlan nesSortPlan = new PhysicalPlan();
    POProject topPrj = new POProject(new OperatorKey("", r.nextLong()));
    topPrj.setColumn(1);
    topPrj.setOverloaded(true);
    topPrj.setResultType(DataType.TUPLE);
    nesSortPlan.add(topPrj);

    POProject prjStar2 = new POProject(new OperatorKey("", r.nextLong()));
    prjStar2.setResultType(DataType.TUPLE);
    prjStar2.setStar(true);
    nesSortPlan.add(prjStar2);

    nesSortPlan.connect(topPrj, prjStar2);
    List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();
    nesSortPlanLst.add(nesSortPlan);

    sort.setSortPlans(nesSortPlanLst);

    php.add(sort);
    php.connect(ldFil1.getLeaves().get(0), sort);
    // have a foreach which takes the sort output
    // and send it two two udfs
    List<String> udfs = new ArrayList<String>();
    udfs.add(COUNT.class.getName());
    udfs.add(SUM.class.getName());
    POForEach fe3 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.add(fe3);
    php.connect(sort, fe3);

    // add a group above the foreach
    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);
    php.connect(fe3, grpChain1.getRoots().get(0));

    udfs.clear();
    udfs.add(AVG.class.getName());
    POForEach fe4 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.addAsLeaf(fe4);

    PhysicalPlan grpChain2 = GenPhyOp.grpChain();
    php.merge(grpChain2);
    php.connect(fe4, grpChain2.getRoots().get(0));

    udfs.clear();
    udfs.add(GFCross.class.getName() + "('1')");
    POForEach fe5 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.addAsLeaf(fe5);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC15.gld");
  }
コード例 #13
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  @Test
  public void testSpl3() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    fl1.setRequestedParallelism(10);
    POFilter fl2 = GenPhyOp.topFilterOp();
    fl2.setRequestedParallelism(20);
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POSplit sp11 = GenPhyOp.topSplitOp();
    POSplit sp21 = GenPhyOp.topSplitOp();
    php.add(sp11);
    php.add(sp21);
    php.connect(fl1, sp11);
    php.connect(fl2, sp21);

    POFilter fl11 = GenPhyOp.topFilterOp();
    fl11.setRequestedParallelism(10);
    POFilter fl21 = GenPhyOp.topFilterOp();
    fl21.setRequestedParallelism(20);
    POFilter fl22 = GenPhyOp.topFilterOp();
    fl22.setRequestedParallelism(30);
    php.add(fl11);
    php.add(fl21);
    php.add(fl22);
    php.connect(sp11, fl11);
    php.connect(sp21, fl21);
    php.connect(sp21, fl22);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    lr1.setRequestedParallelism(40);
    POLocalRearrange lr21 = GenPhyOp.topLocalRearrangeOp();
    lr21.setRequestedParallelism(15);
    POLocalRearrange lr22 = GenPhyOp.topLocalRearrangeOp();
    lr22.setRequestedParallelism(35);
    php.add(lr1);
    php.add(lr21);
    php.add(lr22);
    php.connect(fl11, lr1);
    php.connect(fl21, lr21);
    php.connect(fl22, lr22);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    POPackage pk = GenPhyOp.topPackageOp();
    pk.setRequestedParallelism(25);
    php.addAsLeaf(pk);

    POSplit sp2 = GenPhyOp.topSplitOp();
    php.addAsLeaf(sp2);

    POFilter fl3 = GenPhyOp.topFilterOp();
    fl3.setRequestedParallelism(100);
    POFilter fl4 = GenPhyOp.topFilterOp();
    fl4.setRequestedParallelism(80);
    php.add(fl3);
    php.add(fl4);
    php.connect(sp2, fl3);
    php.connect(sp2, fl4);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC14.gld");
  }
コード例 #14
0
ファイル: TestMRCompiler.java プロジェクト: scr/pig
  @Test
  public void testRun2() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    PhysicalPlan part1 = new PhysicalPlan();
    POLoad lC = GenPhyOp.topLoadOp();
    POFilter fC = GenPhyOp.topFilterOp();
    POLocalRearrange lrC = GenPhyOp.topLocalRearrangeOp();
    POGlobalRearrange grC = GenPhyOp.topGlobalRearrangeOp();
    POPackage pkC = GenPhyOp.topPackageOp();
    part1.add(lC);
    part1.add(fC);
    part1.connect(lC, fC);
    part1.add(lrC);
    part1.connect(fC, lrC);
    part1.add(grC);
    part1.connect(lrC, grC);
    part1.add(pkC);
    part1.connect(grC, pkC);

    POPackage pkD = GenPhyOp.topPackageOp();
    POLocalRearrange lrD = GenPhyOp.topLocalRearrangeOp();
    POGlobalRearrange grD = GenPhyOp.topGlobalRearrangeOp();
    POLoad lD = GenPhyOp.topLoadOp();
    part1.add(lD);
    part1.add(lrD);
    part1.connect(lD, lrD);

    part1.add(grD);
    part1.connect(lrD, grD);
    part1.add(pkD);
    part1.connect(grD, pkD);
    part1.connect(pkD, grC);

    POLoad lA = GenPhyOp.topLoadOp();
    POLoad lB = GenPhyOp.topLoadOp();

    // POLoad lC = lA;
    POFilter fA = GenPhyOp.topFilterOp();

    POLocalRearrange lrA = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lrB = GenPhyOp.topLocalRearrangeOp();

    POGlobalRearrange grAB = GenPhyOp.topGlobalRearrangeOp();

    POPackage pkAB = GenPhyOp.topPackageOp();

    POFilter fAB = GenPhyOp.topFilterOp();
    POUnion unABC = GenPhyOp.topUnionOp();

    php.add(lA);
    php.add(lB);

    php.add(fA);

    php.connect(lA, fA);

    php.add(lrA);
    php.add(lrB);

    php.connect(fA, lrA);
    php.connect(lB, lrB);

    php.add(grAB);
    php.connect(lrA, grAB);
    php.connect(lrB, grAB);

    php.add(pkAB);
    php.connect(grAB, pkAB);

    php.add(fAB);
    php.connect(pkAB, fAB);

    php.merge(part1);

    List<PhysicalOperator> leaves = new ArrayList<PhysicalOperator>();
    for (PhysicalOperator phyOp : php.getLeaves()) {
      leaves.add(phyOp);
    }

    php.add(unABC);
    for (PhysicalOperator physicalOperator : leaves) {
      php.connect(physicalOperator, unABC);
    }

    POStore st = GenPhyOp.topStoreOp();

    php.add(st);
    php.connect(unABC, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC11.gld");
  }
コード例 #15
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
  /**
   * Algebraic functions and distinct in nested plan of a foreach are partially computed in the map
   * and combine phase. A new foreach statement with initial and intermediate forms of algebraic
   * functions are added to map and combine plans respectively.
   *
   * <p>If bag portion of group-by result is projected or a non algebraic expression/udf has bag as
   * input, combiner will not be used. This is because the use of combiner in such case is likely to
   * degrade performance as there will not be much reduction in data size in combine stage to offset
   * the cost of the additional number of times (de)serialization is done.
   *
   * <p>Major areas for enhancement: 1. use of combiner in cogroup 2. queries with order-by, limit
   * or sort in a nested foreach after group-by 3. case where group-by is followed by filter that
   * has algebraic expression
   */
  public static void addCombiner(
      PhysicalPlan mapPlan,
      PhysicalPlan reducePlan,
      PhysicalPlan combinePlan,
      CompilationMessageCollector messageCollector,
      boolean doMapAgg)
      throws VisitorException {

    // part one - check if this MR job represents a group-by + foreach. Find
    // the POLocalRearrange in the map. I'll need it later.
    List<PhysicalOperator> mapLeaves = mapPlan.getLeaves();
    if (mapLeaves == null || mapLeaves.size() != 1) {
      messageCollector.collect(
          "Expected map to have single leaf", MessageType.Warning, PigWarning.MULTI_LEAF_MAP);
      return;
    }
    PhysicalOperator mapLeaf = mapLeaves.get(0);
    if (!(mapLeaf instanceof POLocalRearrange)) {
      return;
    }
    POLocalRearrange rearrange = (POLocalRearrange) mapLeaf;

    List<PhysicalOperator> reduceRoots = reducePlan.getRoots();
    if (reduceRoots.size() != 1) {
      messageCollector.collect(
          "Expected reduce to have single root", MessageType.Warning, PigWarning.MULTI_ROOT_REDUCE);
      return;
    }

    // I expect that the first root should always be a POPackage. If not, I
    // don't know what's going on, so I'm out of here.
    PhysicalOperator root = reduceRoots.get(0);
    if (!(root instanceof POPackage)) {
      messageCollector.collect(
          "Expected reduce root to be a POPackage",
          MessageType.Warning,
          PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
      return;
    }
    POPackage pack = (POPackage) root;

    List<PhysicalOperator> packSuccessors = reducePlan.getSuccessors(root);
    if (packSuccessors == null || packSuccessors.size() != 1) {
      return;
    }
    PhysicalOperator successor = packSuccessors.get(0);

    if (successor instanceof POLimit) {
      // POLimit is acceptable, as long has it has a single foreach as
      // successor
      List<PhysicalOperator> limitSucs = reducePlan.getSuccessors(successor);
      if (limitSucs != null && limitSucs.size() == 1 && limitSucs.get(0) instanceof POForEach) {
        // the code below will now further examine the foreach
        successor = limitSucs.get(0);
      }
    }
    if (successor instanceof POForEach) {
      POForEach foreach = (POForEach) successor;
      List<PhysicalPlan> feInners = foreach.getInputPlans();

      // find algebraic operators and also check if the foreach statement
      // is suitable for combiner use
      List<Pair<PhysicalOperator, PhysicalPlan>> algebraicOps = findAlgebraicOps(feInners);
      if (algebraicOps == null || algebraicOps.size() == 0) {
        // the plan is not combinable or there is nothing to combine
        // we're done
        return;
      }
      if (combinePlan != null && combinePlan.getRoots().size() != 0) {
        messageCollector.collect(
            "Wasn't expecting to find anything already " + "in the combiner!",
            MessageType.Warning,
            PigWarning.NON_EMPTY_COMBINE_PLAN);
        return;
      }

      LOG.info("Choosing to move algebraic foreach to combiner");
      try {
        // replace PODistinct->Project[*] with distinct udf (which is Algebraic)
        for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
          if (!(op2plan.first instanceof PODistinct)) {
            continue;
          }
          DistinctPatcher distinctPatcher = new DistinctPatcher(op2plan.second);
          distinctPatcher.visit();
          if (distinctPatcher.getDistinct() == null) {
            int errCode = 2073;
            String msg =
                "Problem with replacing distinct operator with distinct built-in function.";
            throw new PlanException(msg, errCode, PigException.BUG);
          }
          op2plan.first = distinctPatcher.getDistinct();
        }

        // create new map foreach
        POForEach mfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
        Map<PhysicalOperator, Integer> op2newpos = Maps.newHashMap();
        Integer pos = 1;
        // create plan for each algebraic udf and add as inner plan in map-foreach
        for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
          PhysicalPlan udfPlan = createPlanWithPredecessors(op2plan.first, op2plan.second);
          mfe.addInputPlan(udfPlan, false);
          op2newpos.put(op2plan.first, pos++);
        }
        changeFunc(mfe, POUserFunc.INITIAL);

        // since we will only be creating SingleTupleBag as input to
        // the map foreach, we should flag the POProjects in the map
        // foreach inner plans to also use SingleTupleBag
        for (PhysicalPlan mpl : mfe.getInputPlans()) {
          try {
            new fixMapProjects(mpl).visit();
          } catch (VisitorException e) {
            int errCode = 2089;
            String msg = "Unable to flag project operator to use single tuple bag.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
          }
        }

        // create new combine foreach
        POForEach cfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
        // add algebraic functions with appropriate projection
        addAlgebraicFuncToCombineFE(cfe, op2newpos);
        changeFunc(cfe, POUserFunc.INTERMEDIATE);

        // fix projection and function time for algebraic functions in reduce foreach
        for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
          setProjectInput(op2plan.first, op2plan.second, op2newpos.get(op2plan.first));
          byte resultType = op2plan.first.getResultType();
          ((POUserFunc) op2plan.first).setAlgebraicFunction(POUserFunc.FINAL);
          op2plan.first.setResultType(resultType);
        }

        // we have modified the foreach inner plans - so set them again
        // for the foreach so that foreach can do any re-initialization
        // around them.
        // FIXME - this is a necessary evil right now because the leaves
        // are explicitly stored in the POForeach as a list rather than
        // computed each time at run time from the plans for
        // optimization. Do we want to have the Foreach compute the
        // leaves each time and have Java optimize it (will Java
        // optimize?)?
        mfe.setInputPlans(mfe.getInputPlans());
        cfe.setInputPlans(cfe.getInputPlans());
        foreach.setInputPlans(foreach.getInputPlans());

        // tell POCombinerPackage which fields need projected and which
        // placed in bags. First field is simple project rest need to go
        // into bags
        int numFields = algebraicOps.size() + 1; // algebraic funcs + group key
        boolean[] bags = new boolean[numFields];
        bags[0] = false;
        for (int i = 1; i < numFields; i++) {
          bags[i] = true;
        }

        // Use the POCombiner package in the combine plan
        // as it needs to act differently than the regular
        // package operator.
        CombinerPackager pkgr = new CombinerPackager(pack.getPkgr(), bags);
        POPackage combinePack = pack.clone();
        combinePack.setPkgr(pkgr);
        combinePack.setParentPlan(null);

        combinePlan.add(combinePack);
        combinePlan.add(cfe);
        combinePlan.connect(combinePack, cfe);

        // No need to connect projections in cfe to cp, because
        // PigCombiner directly attaches output from package to
        // root of remaining plan.

        POLocalRearrange mlr = getNewRearrange(rearrange);
        POPartialAgg mapAgg = null;
        if (doMapAgg) {
          mapAgg = createPartialAgg(cfe);
        }

        // A specialized local rearrange operator will replace
        // the normal local rearrange in the map plan. This behaves
        // like the regular local rearrange in the getNext()
        // as far as getting its input and constructing the
        // "key" out of the input. It then returns a tuple with
        // two fields - the key in the first position and the
        // "value" inside a bag in the second position. This output
        // format resembles the format out of a Package. This output
        // will feed to the map foreach which expects this format.
        // If the key field isn't in the project of the combiner or map foreach,
        // it is added to the end (This is required so that we can
        // set up the inner plan of the new Local Rearrange leaf in the map
        // and combine plan to contain just the project of the key).
        patchUpMap(mapPlan, getPreCombinerLR(rearrange), mfe, mapAgg, mlr);
        POLocalRearrange clr = getNewRearrange(rearrange);
        clr.setParentPlan(null);
        combinePlan.add(clr);
        combinePlan.connect(cfe, clr);

        // Change the package operator in the reduce plan to
        // be the POCombiner package, as it needs to act
        // differently than the regular package operator.
        pack.setPkgr(pkgr.clone());
      } catch (Exception e) {
        int errCode = 2018;
        String msg = "Internal error. Unable to introduce the combiner for optimization.";
        throw new OptimizerException(msg, errCode, PigException.BUG, e);
      }
    }
  }