/** * Initialize the current plan by adding it to root tasks. * * @param op the reduce sink operator encountered * @param opProcCtx processing context */ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } assert currTopOp != null; List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx); } currTopOp = null; currAliasId = null; opProcCtx.setCurrTask(currTask); opProcCtx.setCurrTopOp(currTopOp); opProcCtx.setCurrAliasId(currAliasId); }
/** * Initialize the current union plan. * * @param op the reduce sink operator encountered * @param opProcCtx processing context */ public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } initUnionPlan(opProcCtx, currTask, false); }
/** * Initialize the current plan by adding it to root tasks. * * @param op the map join operator encountered * @param opProcCtx processing context * @param pos position of the parent */ public static void initMapJoinPlan( Operator<? extends Serializable> op, GenMRProcContext opProcCtx, boolean readInputMapJoin, boolean readInputUnion, boolean setReducer, int pos, boolean createLocalPlan) throws SemanticException { Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); assert (((pos == -1) && (readInputMapJoin)) || (pos != -1)); int parentPos = (pos == -1) ? 0 : pos; GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(parentPos)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); // The mapjoin has already been encountered. Some context must be stored // about that if (readInputMapJoin) { AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp = opProcCtx.getCurrMapJoinOp(); assert currMapJoinOp != null; boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()).getPosBigTable())) ? false : true; if (setReducer) { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); plan.setReducer(reducer); opTaskMap.put(reducer, currTask); if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); } else { opTaskMap.put(op, currTask); } if (!readInputUnion) { GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp); String taskTmpDir; TableDesc tt_desc; Operator<? extends Serializable> rootOp; if (mjCtx.getOldMapJoin() == null || setReducer) { taskTmpDir = mjCtx.getTaskTmpDir(); tt_desc = mjCtx.getTTDesc(); rootOp = mjCtx.getRootMapJoinOp(); } else { GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx.getOldMapJoin()); taskTmpDir = oldMjCtx.getTaskTmpDir(); tt_desc = oldMjCtx.getTTDesc(); rootOp = oldMjCtx.getRootMapJoinOp(); } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); } else { initUnionPlan(opProcCtx, currTask, false); } opProcCtx.setCurrMapJoinOp(null); } else { MapJoinDesc desc = (MapJoinDesc) op.getConf(); // The map is overloaded to keep track of mapjoins also opTaskMap.put(op, currTask); List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks(); rootTasks.add(currTask); assert currTopOp != null; List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); seenOps.add(currTopOp); boolean local = (pos == desc.getPosBigTable()) ? false : true; setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); setupBucketMapJoinInfo( plan, (AbstractMapJoinOperator<? extends MapJoinDesc>) op, createLocalPlan); } opProcCtx.setCurrTask(currTask); opProcCtx.setCurrTopOp(null); opProcCtx.setCurrAliasId(null); }