Exemplo n.º 1
0
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    // create a the context for walking operators
    OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext);

    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"),
        OpProcFactory.getFilterProc());
    opRules.put(
        new RuleRegExp("R2", PTFOperator.getOperatorName() + "%"), OpProcFactory.getPTFProc());
    opRules.put(
        new RuleRegExp("R3", CommonJoinOperator.getOperatorName() + "%"),
        OpProcFactory.getJoinProc());
    opRules.put(
        new RuleRegExp("R4", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
    opRules.put(
        new RuleRegExp("R5", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getSCRProc());
    opRules.put(
        new RuleRegExp("R6", LimitOperator.getOperatorName() + "%"), OpProcFactory.getLIMProc());
    opRules.put(
        new RuleRegExp("R7", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getUDTFProc());
    opRules.put(
        new RuleRegExp("R8", LateralViewForwardOperator.getOperatorName() + "%"),
        OpProcFactory.getLVFProc());
    opRules.put(
        new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"),
        OpProcFactory.getLVJProc());
    opRules.put(
        new RuleRegExp("R10", ReduceSinkOperator.getOperatorName() + "%"),
        OpProcFactory.getRSProc());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp =
        new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerInfo);
    GraphWalker ogw = new DefaultGraphWalker(disp);

    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);

    if (LOG.isDebugEnabled()) {
      LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values()));
    }
    return pGraphContext;
  }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LateralViewJoinOperator op = (LateralViewJoinOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx.genColLists(op);
      if (cols == null) {
        return null;
      }

      Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
      // As columns go down the DAG, the LVJ will transform internal column
      // names from something like 'key' to '_col0'. Because of this, we need
      // to undo this transformation using the column expression map as the
      // column names propagate up the DAG.

      // this is SEL(*) cols + UDTF cols
      List<String> outputCols = op.getConf().getOutputInternalColNames();

      // cause we cannot prune columns from UDTF branch currently, extract
      // columns from SEL(*) branch only and append all columns from UDTF branch to it
      int numSelColumns = op.getConf().getNumSelColumns();

      List<String> colsAfterReplacement = new ArrayList<String>();
      ArrayList<String> newColNames = new ArrayList<String>();
      for (String col : cols) {
        int index = outputCols.indexOf(col);
        // colExprMap.size() == size of cols from SEL(*) branch
        if (index >= 0 && index < numSelColumns) {
          ExprNodeDesc transformed = colExprMap.get(col);
          Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols());
          newColNames.add(col);
        }
      }
      // update number of columns from sel(*)
      op.getConf().setNumSelColumns(newColNames.size());

      // add all UDTF columns
      // following SEL will do CP for columns from UDTF, not adding SEL in here
      newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size()));
      op.getConf().setOutputInternalColNames(newColNames);
      pruneOperator(ctx, op, newColNames);
      cppCtx.getPrunedColLists().put(op, colsAfterReplacement);
      return null;
    }