@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx; if (dedupCtx.hasBeenRemoved((Operator<?>) nd)) { return false; } ReduceSinkOperator cRS = (ReduceSinkOperator) nd; Operator<?> child = CorrelationUtilities.getSingleChild(cRS); if (child instanceof JoinOperator) { return false; // not supported } if (child instanceof GroupByOperator) { GroupByOperator cGBY = (GroupByOperator) child; if (!CorrelationUtilities.hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) { return process(cRS, cGBY, dedupCtx); } return false; } if (child instanceof SelectOperator) { return process(cRS, dedupCtx); } return false; }
// pRS-cRS @Override public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( cRS, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; }
// pRS-cRS-cGBY @Override public Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( start, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { if (dedupCtx.getPctx().getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { return false; } CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); pRS.getConf().setDeduplicated(true); return true; } return false; }
// pRS-pJOIN-cRS-cGBY @Override public Object process( ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); JoinOperator pJoin = CorrelationUtilities.findPossibleParent( start, JoinOperator.class, dedupCtx.trustScript()); if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) { pJoin.getConf().setFixedAsSorted(true); CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( pJoin, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null) { pRS.getConf().setDeduplicated(true); } return true; } return false; }