// check same filter exists already private boolean filterExists(ReduceSinkOperator target, ExprNodeDesc replaced) { Operator<?> operator = target.getParentOperators().get(0); for (; operator instanceof FilterOperator; operator = operator.getParentOperators().get(0)) { ExprNodeDesc predicate = ((FilterOperator) operator).getConf().getPredicate(); if (ExprNodeDescUtils.containsPredicate(predicate, replaced)) { return true; } } return false; }
// Remove the reduce sink operator // Use BucketizedHiveInputFormat so that one mapper processes exactly one file private void removeReduceSink( ReduceSinkOperator rsOp, TableScanOperator tsOp, FileSinkOperator fsOp) { Operator<? extends OperatorDesc> parRSOp = rsOp.getParentOperators().get(0); parRSOp.getChildOperators().set(0, fsOp); fsOp.getParentOperators().set(0, parRSOp); fsOp.getConf().setMultiFileSpray(false); fsOp.getConf().setTotalFiles(1); fsOp.getConf().setNumFiles(1); fsOp.getConf().setRemovedReduceSinkBucketSort(true); tsOp.setUseBucketizedHiveInputFormat(true); }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp( "R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along TransitiveContext context = new TransitiveContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context); GraphWalker ogw = new LevelOrderWalker(disp, 2); // Create a list of topop nodes List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters(); // insert new filter between RS and parent of RS for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) { ReduceSinkOperator reducer = entry.getKey(); Operator<?> parent = reducer.getParentOperators().get(0); List<ExprNodeDesc> exprs = entry.getValue(); if (parent instanceof FilterOperator) { exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs); ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs); ((FilterOperator) parent).getConf().setPredicate(merged); } else { ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs); RowSchema parentRS = parent.getSchema(); Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged); } } return pGraphContext; }
/** * Initialize the current plan by adding it to root tasks. * * @param op the reduce sink operator encountered * @param opProcCtx processing context */ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } assert currTopOp != null; List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx); } currTopOp = null; currAliasId = null; opProcCtx.setCurrTask(currTask); opProcCtx.setCurrTopOp(currTopOp); opProcCtx.setCurrAliasId(currAliasId); }
/** * Initialize the current union plan. * * @param op the reduce sink operator encountered * @param opProcCtx processing context */ public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } initUnionPlan(opProcCtx, currTask, false); }