/* * add any input columns referenced in WindowFn args or expressions. */ private ArrayList<String> prunedColumnsList( List<String> prunedCols, WindowTableFunctionDef tDef) { // we create a copy of prunedCols to create a list of pruned columns for PTFOperator ArrayList<String> mergedColList = new ArrayList<String>(prunedCols); if (tDef.getWindowFunctions() != null) { for (WindowFunctionDef wDef : tDef.getWindowFunctions()) { if (wDef.getArgs() == null) { continue; } for (PTFExpressionDef arg : wDef.getArgs()) { ExprNodeDesc exprNode = arg.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } } if (tDef.getPartition() != null) { for (PTFExpressionDef col : tDef.getPartition().getExpressions()) { ExprNodeDesc exprNode = col.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } if (tDef.getOrder() != null) { for (PTFExpressionDef col : tDef.getOrder().getExpressions()) { ExprNodeDesc exprNode = col.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } return mergedColList; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { GroupByOperator op = (GroupByOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> colLists = new ArrayList<String>(); GroupByDesc conf = op.getConf(); ArrayList<ExprNodeDesc> keys = conf.getKeys(); for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } ArrayList<AggregationDesc> aggrs = conf.getAggregators(); for (AggregationDesc aggr : aggrs) { ArrayList<ExprNodeDesc> params = aggr.getParameters(); for (ExprNodeDesc param : params) { colLists = Utilities.mergeUniqElems(colLists, param.getCols()); } } int groupingSetPosition = conf.getGroupingSetPosition(); if (groupingSetPosition >= 0) { List<String> cols = cppCtx.genColLists(op); String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition); if (!cols.contains(groupingColumn)) { conf.getOutputColumnNames().remove(groupingSetPosition); if (op.getSchema() != null) { op.getSchema().getSignature().remove(groupingSetPosition); } } } cppCtx.getPrunedColLists().put(op, colLists); return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ReduceSinkOperator op = (ReduceSinkOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); ReduceSinkDesc conf = op.getConf(); List<String> colLists = new ArrayList<String>(); ArrayList<ExprNodeDesc> keys = conf.getKeyCols(); LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys); for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } assert op.getNumChild() == 1; Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); List<String> childCols; if (child instanceof CommonJoinOperator) { childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag()); } else { childCols = cppCtx.getPrunedColList(child); } List<ExprNodeDesc> valCols = conf.getValueCols(); List<String> valColNames = conf.getOutputValueColumnNames(); if (childCols != null) { boolean[] flags = new boolean[valCols.size()]; for (String childCol : childCols) { int index = valColNames.indexOf(Utilities.removeValueTag(childCol)); if (index < 0) { continue; } flags[index] = true; colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols()); } Collections.sort(colLists); pruneReduceSinkOperator(flags, op, cppCtx); cppCtx.getPrunedColLists().put(op, colLists); return null; } // Reduce Sink contains the columns needed - no need to aggregate from // children for (ExprNodeDesc val : valCols) { colLists = Utilities.mergeUniqElems(colLists, val.getCols()); } cppCtx.getPrunedColLists().put(op, colLists); return null; }
public List<String> getReferencedColumns() throws SemanticException { MatchPath matchPath = (MatchPath) evaluator; List<String> columns = new ArrayList<String>(); for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } return columns; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { LateralViewJoinOperator op = (LateralViewJoinOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> cols = cppCtx.genColLists(op); if (cols == null) { return null; } Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap(); // As columns go down the DAG, the LVJ will transform internal column // names from something like 'key' to '_col0'. Because of this, we need // to undo this transformation using the column expression map as the // column names propagate up the DAG. // this is SEL(*) cols + UDTF cols List<String> outputCols = op.getConf().getOutputInternalColNames(); // cause we cannot prune columns from UDTF branch currently, extract // columns from SEL(*) branch only and append all columns from UDTF branch to it int numSelColumns = op.getConf().getNumSelColumns(); List<String> colsAfterReplacement = new ArrayList<String>(); ArrayList<String> newColNames = new ArrayList<String>(); for (String col : cols) { int index = outputCols.indexOf(col); // colExprMap.size() == size of cols from SEL(*) branch if (index >= 0 && index < numSelColumns) { ExprNodeDesc transformed = colExprMap.get(col); Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols()); newColNames.add(col); } } // update number of columns from sel(*) op.getConf().setNumSelColumns(newColNames.size()); // add all UDTF columns // following SEL will do CP for columns from UDTF, not adding SEL in here newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size())); op.getConf().setOutputInternalColNames(newColNames); pruneOperator(ctx, op, newColNames); cppCtx.getPrunedColLists().put(op, colsAfterReplacement); return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { FilterOperator op = (FilterOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; ExprNodeDesc condn = op.getConf().getPredicate(); // get list of columns used in the filter List<String> cl = condn.getCols(); // merge it with the downstream col list List<String> filterOpPrunedColLists = Utilities.mergeUniqElems(cppCtx.genColLists(op), cl); List<String> filterOpPrunedColListsOrderPreserved = preserveColumnOrder(op, filterOpPrunedColLists); cppCtx.getPrunedColLists().put(op, filterOpPrunedColListsOrderPreserved); pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op)); return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { super.process(nd, stack, ctx, nodeOutputs); LateralViewForwardOperator op = (LateralViewForwardOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; // get the SEL(*) branch Operator<?> select = op.getChildOperators().get(LateralViewJoinOperator.SELECT_TAG); // these are from ColumnPrunerSelectProc List<String> cols = cppCtx.getPrunedColList(select); RowResolver rr = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); if (rr.getColumnInfos().size() != cols.size()) { ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); ArrayList<String> outputColNames = new ArrayList<String>(); for (String col : cols) { // revert output cols of SEL(*) to ExprNodeColumnDesc String[] tabcol = rr.reverseLookup(col); ColumnInfo colInfo = rr.get(tabcol[0], tabcol[1]); ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo); colList.add(colExpr); outputColNames.add(col); } // replace SEL(*) to SEL(exprs) ((SelectDesc) select.getConf()).setSelStarNoCompute(false); ((SelectDesc) select.getConf()).setColList(colList); ((SelectDesc) select.getConf()).setOutputColumnNames(outputColNames); pruneOperator(ctx, select, outputColNames); Operator<?> udtfPath = op.getChildOperators().get(LateralViewJoinOperator.UDTF_TAG); List<String> lvFCols = new ArrayList<String>(cppCtx.getPrunedColLists().get(udtfPath)); lvFCols = Utilities.mergeUniqElems(lvFCols, outputColNames); pruneOperator(ctx, op, lvFCols); } return null; }
private static void pruneJoinOperator( NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators(); LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs()); List<String> childColLists = cppCtx.genColLists(op); if (childColLists == null) { return; } Map<Byte, List<String>> prunedColLists = new HashMap<Byte, List<String>>(); for (byte tag : conf.getTagOrder()) { prunedColLists.put(tag, new ArrayList<String>()); } // add the columns in join filters Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet(); Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator(); while (iter.hasNext()) { Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next(); Byte tag = entry.getKey(); for (ExprNodeDesc desc : entry.getValue()) { List<String> cols = prunedColLists.get(tag); cols = Utilities.mergeUniqElems(cols, desc.getCols()); prunedColLists.put(tag, cols); } } RowResolver joinRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); RowResolver newJoinRR = new RowResolver(); ArrayList<String> outputCols = new ArrayList<String>(); ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>(); Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>(); for (int i = 0; i < conf.getOutputColumnNames().size(); i++) { String internalName = conf.getOutputColumnNames().get(i); ExprNodeDesc desc = columnExprMap.get(internalName); Byte tag = conf.getReversedExprs().get(internalName); if (!childColLists.contains(internalName)) { int index = conf.getExprs().get(tag).indexOf(desc); if (index < 0) { continue; } conf.getExprs().get(tag).remove(desc); if (retainMap != null) { retainMap.get(tag).remove(index); } } else { List<String> prunedRSList = prunedColLists.get(tag); if (prunedRSList == null) { prunedRSList = new ArrayList<String>(); prunedColLists.put(tag, prunedRSList); } prunedRSList = Utilities.mergeUniqElems(prunedRSList, desc.getCols()); outputCols.add(internalName); newColExprMap.put(internalName, desc); } } if (mapJoin) { // regenerate the valueTableDesc List<TableDesc> valueTableDescs = new ArrayList<TableDesc>(); for (int pos = 0; pos < op.getParentOperators().size(); pos++) { List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos)); StringBuilder keyOrder = new StringBuilder(); for (int i = 0; i < valueCols.size(); i++) { keyOrder.append("+"); } TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc( PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue")); valueTableDescs.add(valueTableDesc); } ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs); Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet(); Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator(); while (iters.hasNext()) { Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next(); List<ExprNodeDesc> lists = entry.getValue(); for (int j = 0; j < lists.size(); j++) { ExprNodeDesc desc = lists.get(j); Byte tag = entry.getKey(); List<String> cols = prunedColLists.get(tag); cols = Utilities.mergeUniqElems(cols, desc.getCols()); prunedColLists.put(tag, cols); } } } for (Operator<? extends OperatorDesc> child : childOperators) { if (child instanceof ReduceSinkOperator) { boolean[] flags = getPruneReduceSinkOpRetainFlags(childColLists, (ReduceSinkOperator) child); pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx); } } for (int i = 0; i < outputCols.size(); i++) { String internalName = outputCols.get(i); String[] nm = joinRR.reverseLookup(internalName); ColumnInfo col = joinRR.get(nm[0], nm[1]); newJoinRR.put(nm[0], nm[1], col); rs.add(col); } LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs()); op.setColumnExprMap(newColExprMap); conf.setOutputColumnNames(outputCols); op.getSchema().setSignature(rs); cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newJoinRR); cppCtx.getJoinPrunedColLists().put(op, prunedColLists); }