/*
  * add any input columns referenced in WindowFn args or expressions.
  */
 private ArrayList<String> prunedColumnsList(
     List<String> prunedCols, WindowTableFunctionDef tDef) {
   // we create a copy of prunedCols to create a list of pruned columns for PTFOperator
   ArrayList<String> mergedColList = new ArrayList<String>(prunedCols);
   if (tDef.getWindowFunctions() != null) {
     for (WindowFunctionDef wDef : tDef.getWindowFunctions()) {
       if (wDef.getArgs() == null) {
         continue;
       }
       for (PTFExpressionDef arg : wDef.getArgs()) {
         ExprNodeDesc exprNode = arg.getExprNode();
         Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
       }
     }
   }
   if (tDef.getPartition() != null) {
     for (PTFExpressionDef col : tDef.getPartition().getExpressions()) {
       ExprNodeDesc exprNode = col.getExprNode();
       Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
     }
   }
   if (tDef.getOrder() != null) {
     for (PTFExpressionDef col : tDef.getOrder().getExpressions()) {
       ExprNodeDesc exprNode = col.getExprNode();
       Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
     }
   }
   return mergedColList;
 }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
      }
      int groupingSetPosition = conf.getGroupingSetPosition();
      if (groupingSetPosition >= 0) {
        List<String> cols = cppCtx.genColLists(op);
        String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition);
        if (!cols.contains(groupingColumn)) {
          conf.getOutputColumnNames().remove(groupingSetPosition);
          if (op.getSchema() != null) {
            op.getSchema().getSignature().remove(groupingSetPosition);
          }
        }
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();

      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys);
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      assert op.getNumChild() == 1;

      Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);

      List<String> childCols;
      if (child instanceof CommonJoinOperator) {
        childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag());
      } else {
        childCols = cppCtx.getPrunedColList(child);
      }
      List<ExprNodeDesc> valCols = conf.getValueCols();
      List<String> valColNames = conf.getOutputValueColumnNames();

      if (childCols != null) {
        boolean[] flags = new boolean[valCols.size()];

        for (String childCol : childCols) {
          int index = valColNames.indexOf(Utilities.removeValueTag(childCol));
          if (index < 0) {
            continue;
          }
          flags[index] = true;
          colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols());
        }

        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
        cppCtx.getPrunedColLists().put(op, colLists);
        return null;
      }

      // Reduce Sink contains the columns needed - no need to aggregate from
      // children
      for (ExprNodeDesc val : valCols) {
        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
Beispiel #4
0
 public List<String> getReferencedColumns() throws SemanticException {
   MatchPath matchPath = (MatchPath) evaluator;
   List<String> columns = new ArrayList<String>();
   for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) {
     Utilities.mergeUniqElems(columns, exprNode.getCols());
   }
   for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) {
     Utilities.mergeUniqElems(columns, exprNode.getCols());
   }
   return columns;
 }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LateralViewJoinOperator op = (LateralViewJoinOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx.genColLists(op);
      if (cols == null) {
        return null;
      }

      Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
      // As columns go down the DAG, the LVJ will transform internal column
      // names from something like 'key' to '_col0'. Because of this, we need
      // to undo this transformation using the column expression map as the
      // column names propagate up the DAG.

      // this is SEL(*) cols + UDTF cols
      List<String> outputCols = op.getConf().getOutputInternalColNames();

      // cause we cannot prune columns from UDTF branch currently, extract
      // columns from SEL(*) branch only and append all columns from UDTF branch to it
      int numSelColumns = op.getConf().getNumSelColumns();

      List<String> colsAfterReplacement = new ArrayList<String>();
      ArrayList<String> newColNames = new ArrayList<String>();
      for (String col : cols) {
        int index = outputCols.indexOf(col);
        // colExprMap.size() == size of cols from SEL(*) branch
        if (index >= 0 && index < numSelColumns) {
          ExprNodeDesc transformed = colExprMap.get(col);
          Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols());
          newColNames.add(col);
        }
      }
      // update number of columns from sel(*)
      op.getConf().setNumSelColumns(newColNames.size());

      // add all UDTF columns
      // following SEL will do CP for columns from UDTF, not adding SEL in here
      newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size()));
      op.getConf().setOutputInternalColNames(newColNames);
      pruneOperator(ctx, op, newColNames);
      cppCtx.getPrunedColLists().put(op, colsAfterReplacement);
      return null;
    }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      FilterOperator op = (FilterOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      ExprNodeDesc condn = op.getConf().getPredicate();
      // get list of columns used in the filter
      List<String> cl = condn.getCols();
      // merge it with the downstream col list
      List<String> filterOpPrunedColLists = Utilities.mergeUniqElems(cppCtx.genColLists(op), cl);
      List<String> filterOpPrunedColListsOrderPreserved =
          preserveColumnOrder(op, filterOpPrunedColLists);
      cppCtx.getPrunedColLists().put(op, filterOpPrunedColListsOrderPreserved);

      pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op));

      return null;
    }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      super.process(nd, stack, ctx, nodeOutputs);
      LateralViewForwardOperator op = (LateralViewForwardOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;

      // get the SEL(*) branch
      Operator<?> select = op.getChildOperators().get(LateralViewJoinOperator.SELECT_TAG);

      // these are from ColumnPrunerSelectProc
      List<String> cols = cppCtx.getPrunedColList(select);
      RowResolver rr = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      if (rr.getColumnInfos().size() != cols.size()) {
        ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
        ArrayList<String> outputColNames = new ArrayList<String>();
        for (String col : cols) {
          // revert output cols of SEL(*) to ExprNodeColumnDesc
          String[] tabcol = rr.reverseLookup(col);
          ColumnInfo colInfo = rr.get(tabcol[0], tabcol[1]);
          ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo);
          colList.add(colExpr);
          outputColNames.add(col);
        }
        // replace SEL(*) to SEL(exprs)
        ((SelectDesc) select.getConf()).setSelStarNoCompute(false);
        ((SelectDesc) select.getConf()).setColList(colList);
        ((SelectDesc) select.getConf()).setOutputColumnNames(outputColNames);
        pruneOperator(ctx, select, outputColNames);

        Operator<?> udtfPath = op.getChildOperators().get(LateralViewJoinOperator.UDTF_TAG);
        List<String> lvFCols = new ArrayList<String>(cppCtx.getPrunedColLists().get(udtfPath));
        lvFCols = Utilities.mergeUniqElems(lvFCols, outputColNames);
        pruneOperator(ctx, op, lvFCols);
      }
      return null;
    }
  private static void pruneJoinOperator(
      NodeProcessorCtx ctx,
      CommonJoinOperator op,
      JoinDesc conf,
      Map<String, ExprNodeDesc> columnExprMap,
      Map<Byte, List<Integer>> retainMap,
      boolean mapJoin)
      throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();

    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    List<String> childColLists = cppCtx.genColLists(op);
    if (childColLists == null) {
      return;
    }

    Map<Byte, List<String>> prunedColLists = new HashMap<Byte, List<String>>();
    for (byte tag : conf.getTagOrder()) {
      prunedColLists.put(tag, new ArrayList<String>());
    }

    // add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
      Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
      Byte tag = entry.getKey();
      for (ExprNodeDesc desc : entry.getValue()) {
        List<String> cols = prunedColLists.get(tag);
        cols = Utilities.mergeUniqElems(cols, desc.getCols());
        prunedColLists.put(tag, cols);
      }
    }

    RowResolver joinRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
    RowResolver newJoinRR = new RowResolver();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();

    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
      String internalName = conf.getOutputColumnNames().get(i);
      ExprNodeDesc desc = columnExprMap.get(internalName);
      Byte tag = conf.getReversedExprs().get(internalName);
      if (!childColLists.contains(internalName)) {
        int index = conf.getExprs().get(tag).indexOf(desc);
        if (index < 0) {
          continue;
        }
        conf.getExprs().get(tag).remove(desc);
        if (retainMap != null) {
          retainMap.get(tag).remove(index);
        }
      } else {
        List<String> prunedRSList = prunedColLists.get(tag);
        if (prunedRSList == null) {
          prunedRSList = new ArrayList<String>();
          prunedColLists.put(tag, prunedRSList);
        }
        prunedRSList = Utilities.mergeUniqElems(prunedRSList, desc.getCols());
        outputCols.add(internalName);
        newColExprMap.put(internalName, desc);
      }
    }

    if (mapJoin) {
      // regenerate the valueTableDesc
      List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
      for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
        List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
        StringBuilder keyOrder = new StringBuilder();
        for (int i = 0; i < valueCols.size(); i++) {
          keyOrder.append("+");
        }

        TableDesc valueTableDesc =
            PlanUtils.getMapJoinValueTableDesc(
                PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));

        valueTableDescs.add(valueTableDesc);
      }
      ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);

      Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
      Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
      while (iters.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
        List<ExprNodeDesc> lists = entry.getValue();
        for (int j = 0; j < lists.size(); j++) {
          ExprNodeDesc desc = lists.get(j);
          Byte tag = entry.getKey();
          List<String> cols = prunedColLists.get(tag);
          cols = Utilities.mergeUniqElems(cols, desc.getCols());
          prunedColLists.put(tag, cols);
        }
      }
    }

    for (Operator<? extends OperatorDesc> child : childOperators) {
      if (child instanceof ReduceSinkOperator) {
        boolean[] flags =
            getPruneReduceSinkOpRetainFlags(childColLists, (ReduceSinkOperator) child);
        pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
      }
    }

    for (int i = 0; i < outputCols.size(); i++) {
      String internalName = outputCols.get(i);
      String[] nm = joinRR.reverseLookup(internalName);
      ColumnInfo col = joinRR.get(nm[0], nm[1]);
      newJoinRR.put(nm[0], nm[1], col);
      rs.add(col);
    }

    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newJoinRR);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
  }