@Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); // process reduce sink added by hive.enforce.bucketing or hive.enforce.sorting opRules.put( new RuleRegExp( "R1", ReduceSinkOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%" + FileSinkOperator.getOperatorName() + "%"), getBucketSortReduceSinkProc(pctx)); // The dispatcher fires the processor corresponding to the closest matching rule Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of top nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
public static void setChildrenCollector( List<Operator<? extends OperatorDesc>> childOperators, OutputCollector out) { if (childOperators == null) { return; } for (Operator<? extends OperatorDesc> op : childOperators) { if (op.getName().equals(ReduceSinkOperator.getOperatorName())) { op.setOutputCollector(out); } else { setChildrenCollector(op.getChildOperators(), out); } } }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; // create a the context for walking operators OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc()); opRules.put( new RuleRegExp("R2", PTFOperator.getOperatorName() + "%"), OpProcFactory.getPTFProc()); opRules.put( new RuleRegExp("R3", CommonJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc()); opRules.put( new RuleRegExp("R4", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc()); opRules.put( new RuleRegExp("R5", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getSCRProc()); opRules.put( new RuleRegExp("R6", LimitOperator.getOperatorName() + "%"), OpProcFactory.getLIMProc()); opRules.put( new RuleRegExp("R7", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getUDTFProc()); opRules.put( new RuleRegExp("R8", LateralViewForwardOperator.getOperatorName() + "%"), OpProcFactory.getLVFProc()); opRules.put( new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLVJProc()); opRules.put( new RuleRegExp("R10", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getRSProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerInfo); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); if (LOG.isDebugEnabled()) { LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); } return pGraphContext; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp( "R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along TransitiveContext context = new TransitiveContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context); GraphWalker ogw = new LevelOrderWalker(disp, 2); // Create a list of topop nodes List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters(); // insert new filter between RS and parent of RS for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) { ReduceSinkOperator reducer = entry.getKey(); Operator<?> parent = reducer.getParentOperators().get(0); List<ExprNodeDesc> exprs = entry.getValue(); if (parent instanceof FilterOperator) { exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs); ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs); ((FilterOperator) parent).getConf().setPredicate(merged); } else { ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs); RowSchema parentRS = parent.getSchema(); Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged); } } return pGraphContext; }
public static void setChildrenCollector( List<Operator<? extends OperatorDesc>> childOperators, Map<String, OutputCollector> outMap) { if (childOperators == null) { return; } for (Operator<? extends OperatorDesc> op : childOperators) { if (op.getName().equals(ReduceSinkOperator.getOperatorName())) { ReduceSinkOperator rs = ((ReduceSinkOperator) op); if (outMap.containsKey(rs.getConf().getOutputName())) { LOG.info("Setting output collector: " + rs + " --> " + rs.getConf().getOutputName()); rs.setOutputCollector(outMap.get(rs.getConf().getOutputName())); } } else { setChildrenCollector(op.getChildOperators(), outMap); } } }
/** * If two reducer sink operators share the same partition/sort columns and order, they can be * merged. This should happen after map join optimization because map join optimization will remove * reduce sink operators. * * <p>This optimizer removes/replaces child-RS (not parent) which is safer way for * DefaultGraphWalker. */ public class ReduceSinkDeDuplication extends Transform { private static final String RS = ReduceSinkOperator.getOperatorName(); private static final String GBY = GroupByOperator.getOperatorName(); private static final String JOIN = JoinOperator.getOperatorName(); protected ParseContext pGraphContext; @Override public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; // generate pruned column list for all relevant operators ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); // for auto convert map-joins, it not safe to dedup in here (todo) boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK) && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN); // If multiple rules can be matched with same cost, last rule will be choosen as a processor // see DefaultRuleDispatcher#dispatch() Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", RS + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getReducerReducerProc()); opRules.put( new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc()); if (mergeJoins) { opRules.put( new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getJoinReducerProc()); } // TODO RS+JOIN // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher( ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); return pGraphContext; } protected class ReduceSinkDeduplicateProcCtx extends AbstractCorrelationProcCtx { public ReduceSinkDeduplicateProcCtx(ParseContext pctx) { super(pctx); } } static class ReduceSinkDeduplicateProcFactory { public static NodeProcessor getReducerReducerProc() { return new ReducerReducerProc(); } public static NodeProcessor getGroupbyReducerProc() { return new GroupbyReducerProc(); } public static NodeProcessor getJoinReducerProc() { return new JoinReducerProc(); } public static NodeProcessor getDefaultProc() { return new DefaultProc(); } } /* * do nothing. */ static class DefaultProc implements NodeProcessor { @Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { return null; } } public abstract static class AbsctractReducerReducerProc implements NodeProcessor { @Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx; if (dedupCtx.hasBeenRemoved((Operator<?>) nd)) { return false; } ReduceSinkOperator cRS = (ReduceSinkOperator) nd; Operator<?> child = CorrelationUtilities.getSingleChild(cRS); if (child instanceof JoinOperator) { return false; // not supported } if (child instanceof GroupByOperator) { GroupByOperator cGBY = (GroupByOperator) child; if (!CorrelationUtilities.hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) { return process(cRS, cGBY, dedupCtx); } return false; } if (child instanceof SelectOperator) { return process(cRS, dedupCtx); } return false; } protected abstract Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException; protected abstract Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException; // for JOIN-RS case, it's not possible generally to merge if child has // less key/partition columns than parents protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer) throws SemanticException { List<Operator<?>> parents = pJoin.getParentOperators(); ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]); ReduceSinkDesc cRSc = cRS.getConf(); ReduceSinkDesc pRS0c = pRSs[0].getConf(); if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) { return false; } if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) { return false; } Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers()); if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) { return false; } Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder()); if (moveRSOrderTo == null) { return false; } boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin); int cKeySize = cRSc.getKeyCols().size(); for (int i = 0; i < cKeySize; i++) { ExprNodeDesc cexpr = cRSc.getKeyCols().get(i); ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length]; for (int tag = 0; tag < pRSs.length; tag++) { pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i); } int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted); if (found != i) { return false; } } int cPartSize = cRSc.getPartitionCols().size(); for (int i = 0; i < cPartSize; i++) { ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i); ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length]; for (int tag = 0; tag < pRSs.length; tag++) { pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i); } int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted); if (found != i) { return false; } } if (moveReducerNumTo > 0) { for (ReduceSinkOperator pRS : pRSs) { pRS.getConf().setNumReducers(cRS.getConf().getNumReducers()); } } return true; } /** * Current RSDedup remove/replace child RS. For key columns, sorting order, and the number of * reducers, copy more specific part of configurations of child RS to that of parent RS. For * partitioning columns, if both child RS and parent RS have been assigned partitioning columns, * we will choose the more general partitioning columns. If parent RS has not been assigned any * partitioning column, we will use partitioning columns (if exist) of child RS. */ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer) throws SemanticException { int[] result = checkStatus(cRS, pRS, minReducer); if (result == null) { return false; } if (result[0] > 0) { // The sorting columns of the child RS are more specific than // those of the parent RS. Assign sorting columns of the child RS // to the parent RS. List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols(); pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS)); } if (result[1] < 0) { // The partitioning columns of the parent RS are more specific than // those of the child RS. List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols(); if (childPCs != null && !childPCs.isEmpty()) { // If partitioning columns of the child RS are assigned, // assign these to the partitioning columns of the parent RS. pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS)); } } else if (result[1] > 0) { // The partitioning columns of the child RS are more specific than // those of the parent RS. List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols(); if (parentPCs == null || parentPCs.isEmpty()) { // If partitioning columns of the parent RS are not assigned, // assign partitioning columns of the child RS to the parent RS. ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols(); pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS)); } } if (result[2] > 0) { // The sorting order of the child RS is more specific than // that of the parent RS. Assign the sorting order of the child RS // to the parent RS. if (result[0] <= 0) { // Sorting columns of the parent RS are more specific than those of the // child RS but Sorting order of the child RS is more specific than // that of the parent RS. throw new SemanticException( "Sorting columns and order don't match. " + "Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;"); } pRS.getConf().setOrder(cRS.getConf().getOrder()); } if (result[3] > 0) { // The number of reducers of the child RS is more specific than // that of the parent RS. Assign the number of reducers of the child RS // to the parent RS. pRS.getConf().setNumReducers(cRS.getConf().getNumReducers()); } if (result[4] > 0) { // This case happens only when pRS key is empty in which case we can use // number of distribution keys and key serialization info from cRS pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys()); List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(pRS.getConf().getKeyCols(), "reducesinkkey"); TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder()); ArrayList<String> outputKeyCols = Lists.newArrayList(); for (int i = 0; i < fields.size(); i++) { outputKeyCols.add(fields.get(i).getName()); } pRS.getConf().setOutputKeyColumnNames(outputKeyCols); pRS.getConf().setKeySerializeInfo(keyTable); } return true; } /** * Returns merge directions between two RSs for criterias (ordering, number of reducers, reducer * keys, partition keys). Returns null if any of categories is not mergeable. * * <p>Values for each index can be -1, 0, 1 1. 0 means two configuration in the category is the * same 2. for -1, configuration of parent RS is more specific than child RS 3. for 1, * configuration of child RS is more specific than parent RS */ private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer) throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder()); if (moveRSOrderTo == null) { return null; } Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers()); if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) { return null; } List<ExprNodeDesc> ckeys = cConf.getKeyCols(); List<ExprNodeDesc> pkeys = pConf.getKeyCols(); Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS); if (moveKeyColTo == null) { return null; } List<ExprNodeDesc> cpars = cConf.getPartitionCols(); List<ExprNodeDesc> ppars = pConf.getPartitionCols(); Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS); if (movePartitionColTo == null) { return null; } Integer moveNumDistKeyTo = checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys()); return new int[] { moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo }; } private Integer checkNumDistributionKey(int cnd, int pnd) { // number of distribution keys of cRS is chosen only when numDistKeys of pRS // is 0 or less. In all other cases, distribution of the keys is based on // the pRS which is more generic than cRS. // Examples: // case 1: if pRS sort key is (a, b) and cRS sort key is (a, b, c) and number of // distribution keys are 2 and 3 resp. then after merge the sort keys will // be (a, b, c) while the number of distribution keys will be 2. // case 2: if pRS sort key is empty and number of distribution keys is 0 // and if cRS sort key is (a, b) and number of distribution keys is 2 then // after merge new sort key will be (a, b) and number of distribution keys // will be 2. if (pnd <= 0) { return 1; } return 0; } /** * Overlapping part of keys should be the same between parent and child. And if child has more * keys than parent, non-overlapping part of keys should be backtrackable to parent. */ private Integer checkExprs( List<ExprNodeDesc> ckeys, List<ExprNodeDesc> pkeys, ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException { Integer moveKeyColTo = 0; if (ckeys == null || ckeys.isEmpty()) { if (pkeys != null && !pkeys.isEmpty()) { moveKeyColTo = -1; } } else { if (pkeys == null || pkeys.isEmpty()) { for (ExprNodeDesc ckey : ckeys) { if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) { // cKey is not present in parent return null; } } moveKeyColTo = 1; } else { moveKeyColTo = sameKeys(ckeys, pkeys, cRS, pRS); } } return moveKeyColTo; } // backtrack key exprs of child to parent and compare it with parent's protected Integer sameKeys( List<ExprNodeDesc> cexprs, List<ExprNodeDesc> pexprs, Operator<?> child, Operator<?> parent) throws SemanticException { int common = Math.min(cexprs.size(), pexprs.size()); int limit = Math.max(cexprs.size(), pexprs.size()); int i = 0; for (; i < common; i++) { ExprNodeDesc pexpr = pexprs.get(i); ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent); if (cexpr == null || !pexpr.isSame(cexpr)) { return null; } } for (; i < limit; i++) { if (cexprs.size() > pexprs.size()) { if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) { // cKey is not present in parent return null; } } } return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } // order of overlapping keys should be exactly the same protected Integer checkOrder(String corder, String porder) { if (corder == null || corder.trim().equals("")) { if (porder == null || porder.trim().equals("")) { return 0; } return -1; } if (porder == null || porder.trim().equals("")) { return 1; } corder = corder.trim(); porder = porder.trim(); int target = Math.min(corder.length(), porder.length()); if (!corder.substring(0, target).equals(porder.substring(0, target))) { return null; } return Integer.valueOf(corder.length()).compareTo(porder.length()); } /** * If number of reducers for RS is -1, the RS can have any number of reducers. It's generally * true except for order-by or forced bucketing cases. if both of num-reducers are not -1, those * number should be the same. */ protected Integer checkNumReducer(int creduce, int preduce) { if (creduce < 0) { if (preduce < 0) { return 0; } return -1; } if (preduce < 0) { return 1; } if (creduce != preduce) { return null; } return 0; } } static class GroupbyReducerProc extends AbsctractReducerReducerProc { // pRS-pGBY-cRS @Override public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { GroupByOperator pGBY = CorrelationUtilities.findPossibleParent( cRS, GroupByOperator.class, dedupCtx.trustScript()); if (pGBY == null) { return false; } ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( pGBY, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; } // pRS-pGBY-cRS-cGBY @Override public Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); GroupByOperator pGBY = CorrelationUtilities.findPossibleParent( start, GroupByOperator.class, dedupCtx.trustScript()); if (pGBY == null) { return false; } ReduceSinkOperator pRS = CorrelationUtilities.getSingleParent(pGBY, ReduceSinkOperator.class); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; } } static class JoinReducerProc extends AbsctractReducerReducerProc { // pRS-pJOIN-cRS @Override public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { JoinOperator pJoin = CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript()); if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) { pJoin.getConf().setFixedAsSorted(true); CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( pJoin, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null) { pRS.getConf().setDeduplicated(true); } return true; } return false; } // pRS-pJOIN-cRS-cGBY @Override public Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); JoinOperator pJoin = CorrelationUtilities.findPossibleParent( start, JoinOperator.class, dedupCtx.trustScript()); if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) { pJoin.getConf().setFixedAsSorted(true); CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( pJoin, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null) { pRS.getConf().setDeduplicated(true); } return true; } return false; } } static class ReducerReducerProc extends AbsctractReducerReducerProc { // pRS-cRS @Override public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( cRS, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; } // pRS-cRS-cGBY @Override public Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( start, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { if (dedupCtx.getPctx().getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { return false; } CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; } } }