/** * @param op * @param index * @param plan * @throws PlanException */ private static void setProjectInput(PhysicalOperator op, PhysicalPlan plan, int index) throws PlanException { String scope = op.getOperatorKey().scope; POProject proj = new POProject( new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), op.getRequestedParallelism(), index); proj.setResultType(DataType.BAG); // Remove old connections and elements from the plan plan.trimAbove(op); plan.add(proj); plan.connect(proj, op); List<PhysicalOperator> inputs = Lists.newArrayList(); inputs.add(proj); op.setInputs(inputs); }
/** * stolen from JobControlCompiler TODO: refactor it to share this * * @param physicalPlan * @param poLoad * @param jobConf * @return * @throws java.io.IOException */ private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf) throws IOException { // 这部分似乎没用 Job job = new Job(jobConf); LoadFunc loadFunc = poLoad.getLoadFunc(); loadFunc.setLocation(poLoad.getLFile().getFileName(), job); // stolen from JobControlCompiler ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>(); // Store the inp filespecs pigInputs.add(poLoad.getLFile()); ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList(); ArrayList<String> inpSignatures = Lists.newArrayList(); ArrayList<Long> inpLimits = Lists.newArrayList(); // Store the target operators for tuples read // from this input List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad); List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList(); if (loadSuccessors != null) { for (PhysicalOperator loadSuccessor : loadSuccessors) { loadSuccessorsKeys.add(loadSuccessor.getOperatorKey()); } } inpTargets.add(loadSuccessorsKeys); inpSignatures.add(poLoad.getSignature()); inpLimits.add(poLoad.getLimit()); jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs)); jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets)); jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures)); jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits)); return jobConf; }