/** * Change the algebriac function type for algebraic functions in map and combine In map and * combine the algebraic functions will be leaf of the plan * * @param fe * @param type * @throws PlanException */ private static void changeFunc(POForEach fe, byte type) throws PlanException { for (PhysicalPlan plan : fe.getInputPlans()) { List<PhysicalOperator> leaves = plan.getLeaves(); if (leaves == null || leaves.size() != 1) { int errCode = 2019; String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves."; throw new PlanException(msg, errCode, PigException.BUG); } PhysicalOperator leaf = leaves.get(0); if (leaf instanceof POProject) { continue; } if (!(leaf instanceof POUserFunc)) { int errCode = 2020; String msg = "Expected to find plan with UDF or project leaf. Found " + leaf.getClass().getSimpleName(); throw new PlanException(msg, errCode, PigException.BUG); } POUserFunc func = (POUserFunc) leaf; try { func.setAlgebraicFunction(type); } catch (ExecException e) { int errCode = 2075; String msg = "Could not set algebraic function type."; throw new PlanException(msg, errCode, PigException.BUG, e); } } }
@Override public void visitProject(POProject proj) throws VisitorException { // check if this project is preceded by PODistinct and // has the return type bag List<PhysicalOperator> preds = mPlan.getPredecessors(proj); if (preds == null) return; // this is a leaf project and so not interesting for patching PhysicalOperator pred = preds.get(0); if (preds.size() == 1 && pred instanceof PODistinct) { if (distinct != null) { // we should not already have been patched since the // Project-Distinct pair should occur only once int errCode = 2076; String msg = "Unexpected Project-Distinct pair while trying to set up plans for use with combiner."; throw new OptimizerException(msg, errCode, PigException.BUG); } // we have stick in the POUserfunc(org.apache.pig.builtin.Distinct)[DataBag] // in place of the Project-PODistinct pair PhysicalOperator distinctPredecessor = mPlan.getPredecessors(pred).get(0); POUserFunc func = null; try { String scope = proj.getOperatorKey().scope; List<PhysicalOperator> funcInput = new ArrayList<PhysicalOperator>(); FuncSpec fSpec = new FuncSpec(DISTINCT_UDF_CLASSNAME); funcInput.add(distinctPredecessor); // explicitly set distinctPredecessor's result type to // be tuple - this is relevant when distinctPredecessor is // originally a POForeach with return type BAG - we need to // set it to tuple so we get a stream of tuples. distinctPredecessor.setResultType(DataType.TUPLE); func = new POUserFunc( new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), -1, funcInput, fSpec); func.setResultType(DataType.BAG); mPlan.replace(proj, func); mPlan.remove(pred); // connect the the newly added "func" to // the predecessor to the earlier PODistinct mPlan.connect(distinctPredecessor, func); } catch (PlanException e) { int errCode = 2077; String msg = "Problem with reconfiguring plan to add distinct built-in function."; throw new OptimizerException(msg, errCode, PigException.BUG, e); } distinct = func; } }