@Override public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; // create a the context for walking operators OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc()); opRules.put( new RuleRegExp("R2", PTFOperator.getOperatorName() + "%"), OpProcFactory.getPTFProc()); opRules.put( new RuleRegExp("R3", CommonJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc()); opRules.put( new RuleRegExp("R4", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc()); opRules.put( new RuleRegExp("R5", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getSCRProc()); opRules.put( new RuleRegExp("R6", LimitOperator.getOperatorName() + "%"), OpProcFactory.getLIMProc()); opRules.put( new RuleRegExp("R7", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getUDTFProc()); opRules.put( new RuleRegExp("R8", LateralViewForwardOperator.getOperatorName() + "%"), OpProcFactory.getLVFProc()); opRules.put( new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLVJProc()); opRules.put( new RuleRegExp("R10", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getRSProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerInfo); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); if (LOG.isDebugEnabled()) { LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); } return pGraphContext; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { LateralViewJoinOperator op = (LateralViewJoinOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> cols = cppCtx.genColLists(op); if (cols == null) { return null; } Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap(); // As columns go down the DAG, the LVJ will transform internal column // names from something like 'key' to '_col0'. Because of this, we need // to undo this transformation using the column expression map as the // column names propagate up the DAG. // this is SEL(*) cols + UDTF cols List<String> outputCols = op.getConf().getOutputInternalColNames(); // cause we cannot prune columns from UDTF branch currently, extract // columns from SEL(*) branch only and append all columns from UDTF branch to it int numSelColumns = op.getConf().getNumSelColumns(); List<String> colsAfterReplacement = new ArrayList<String>(); ArrayList<String> newColNames = new ArrayList<String>(); for (String col : cols) { int index = outputCols.indexOf(col); // colExprMap.size() == size of cols from SEL(*) branch if (index >= 0 && index < numSelColumns) { ExprNodeDesc transformed = colExprMap.get(col); Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols()); newColNames.add(col); } } // update number of columns from sel(*) op.getConf().setNumSelColumns(newColNames.size()); // add all UDTF columns // following SEL will do CP for columns from UDTF, not adding SEL in here newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size())); op.getConf().setOutputInternalColNames(newColNames); pruneOperator(ctx, op, newColNames); cppCtx.getPrunedColLists().put(op, colsAfterReplacement); return null; }