コード例 #1
0
 @Override
 public Object process(
     Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
     throws SemanticException {
   ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx;
   if (dedupCtx.hasBeenRemoved((Operator<?>) nd)) {
     return false;
   }
   ReduceSinkOperator cRS = (ReduceSinkOperator) nd;
   Operator<?> child = CorrelationUtilities.getSingleChild(cRS);
   if (child instanceof JoinOperator) {
     return false; // not supported
   }
   if (child instanceof GroupByOperator) {
     GroupByOperator cGBY = (GroupByOperator) child;
     if (!CorrelationUtilities.hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) {
       return process(cRS, cGBY, dedupCtx);
     }
     return false;
   }
   if (child instanceof SelectOperator) {
     return process(cRS, dedupCtx);
   }
   return false;
 }
コード例 #2
0
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
      }
      int groupingSetPosition = conf.getGroupingSetPosition();
      if (groupingSetPosition >= 0) {
        List<String> cols = cppCtx.genColLists(op);
        String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition);
        if (!cols.contains(groupingColumn)) {
          conf.getOutputColumnNames().remove(groupingSetPosition);
          if (op.getSchema() != null) {
            op.getSchema().getSignature().remove(groupingSetPosition);
          }
        }
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
コード例 #3
0
/**
 * If two reducer sink operators share the same partition/sort columns and order, they can be
 * merged. This should happen after map join optimization because map join optimization will remove
 * reduce sink operators.
 *
 * <p>This optimizer removes/replaces child-RS (not parent) which is safer way for
 * DefaultGraphWalker.
 */
public class ReduceSinkDeDuplication extends Transform {

  private static final String RS = ReduceSinkOperator.getOperatorName();
  private static final String GBY = GroupByOperator.getOperatorName();
  private static final String JOIN = JoinOperator.getOperatorName();

  protected ParseContext pGraphContext;

  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    // generate pruned column list for all relevant operators
    ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);

    // for auto convert map-joins, it not safe to dedup in here (todo)
    boolean mergeJoins =
        !pctx.getConf().getBoolVar(HIVECONVERTJOIN)
            && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK)
            && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)
            && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN);

    // If multiple rules can be matched with same cost, last rule will be choosen as a processor
    // see DefaultRuleDispatcher#dispatch()
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp("R1", RS + "%.*%" + RS + "%"),
        ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
    opRules.put(
        new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"),
        ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
    if (mergeJoins) {
      opRules.put(
          new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"),
          ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
    }
    // TODO RS+JOIN

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp =
        new DefaultRuleDispatcher(
            ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);

    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pGraphContext;
  }

  protected class ReduceSinkDeduplicateProcCtx extends AbstractCorrelationProcCtx {

    public ReduceSinkDeduplicateProcCtx(ParseContext pctx) {
      super(pctx);
    }
  }

  static class ReduceSinkDeduplicateProcFactory {

    public static NodeProcessor getReducerReducerProc() {
      return new ReducerReducerProc();
    }

    public static NodeProcessor getGroupbyReducerProc() {
      return new GroupbyReducerProc();
    }

    public static NodeProcessor getJoinReducerProc() {
      return new JoinReducerProc();
    }

    public static NodeProcessor getDefaultProc() {
      return new DefaultProc();
    }
  }

  /*
   * do nothing.
   */
  static class DefaultProc implements NodeProcessor {
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      return null;
    }
  }

  public abstract static class AbsctractReducerReducerProc implements NodeProcessor {

    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx;
      if (dedupCtx.hasBeenRemoved((Operator<?>) nd)) {
        return false;
      }
      ReduceSinkOperator cRS = (ReduceSinkOperator) nd;
      Operator<?> child = CorrelationUtilities.getSingleChild(cRS);
      if (child instanceof JoinOperator) {
        return false; // not supported
      }
      if (child instanceof GroupByOperator) {
        GroupByOperator cGBY = (GroupByOperator) child;
        if (!CorrelationUtilities.hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) {
          return process(cRS, cGBY, dedupCtx);
        }
        return false;
      }
      if (child instanceof SelectOperator) {
        return process(cRS, dedupCtx);
      }
      return false;
    }

    protected abstract Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException;

    protected abstract Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException;

    // for JOIN-RS case, it's not possible generally to merge if child has
    // less key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }

      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }

      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

      int cKeySize = cRSc.getKeyCols().size();
      for (int i = 0; i < cKeySize; i++) {
        ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }
      int cPartSize = cRSc.getPartitionCols().size();
      for (int i = 0; i < cPartSize; i++) {
        ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }

      if (moveReducerNumTo > 0) {
        for (ReduceSinkOperator pRS : pRSs) {
          pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
        }
      }
      return true;
    }

    /**
     * Current RSDedup remove/replace child RS. For key columns, sorting order, and the number of
     * reducers, copy more specific part of configurations of child RS to that of parent RS. For
     * partitioning columns, if both child RS and parent RS have been assigned partitioning columns,
     * we will choose the more general partitioning columns. If parent RS has not been assigned any
     * partitioning column, we will use partitioning columns (if exist) of child RS.
     */
    protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      int[] result = checkStatus(cRS, pRS, minReducer);
      if (result == null) {
        return false;
      }

      if (result[0] > 0) {
        // The sorting columns of the child RS are more specific than
        // those of the parent RS. Assign sorting columns of the child RS
        // to the parent RS.
        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
        pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
      }

      if (result[1] < 0) {
        // The partitioning columns of the parent RS are more specific than
        // those of the child RS.
        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
        if (childPCs != null && !childPCs.isEmpty()) {
          // If partitioning columns of the child RS are assigned,
          // assign these to the partitioning columns of the parent RS.
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      } else if (result[1] > 0) {
        // The partitioning columns of the child RS are more specific than
        // those of the parent RS.
        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
        if (parentPCs == null || parentPCs.isEmpty()) {
          // If partitioning columns of the parent RS are not assigned,
          // assign partitioning columns of the child RS to the parent RS.
          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      }

      if (result[2] > 0) {
        // The sorting order of the child RS is more specific than
        // that of the parent RS. Assign the sorting order of the child RS
        // to the parent RS.
        if (result[0] <= 0) {
          // Sorting columns of the parent RS are more specific than those of the
          // child RS but Sorting order of the child RS is more specific than
          // that of the parent RS.
          throw new SemanticException(
              "Sorting columns and order don't match. "
                  + "Try set "
                  + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION
                  + "=false;");
        }
        pRS.getConf().setOrder(cRS.getConf().getOrder());
      }

      if (result[3] > 0) {
        // The number of reducers of the child RS is more specific than
        // that of the parent RS. Assign the number of reducers of the child RS
        // to the parent RS.
        pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
      }

      if (result[4] > 0) {
        // This case happens only when pRS key is empty in which case we can use
        // number of distribution keys and key serialization info from cRS
        pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys());
        List<FieldSchema> fields =
            PlanUtils.getFieldSchemasFromColumnList(pRS.getConf().getKeyCols(), "reducesinkkey");
        TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder());
        ArrayList<String> outputKeyCols = Lists.newArrayList();
        for (int i = 0; i < fields.size(); i++) {
          outputKeyCols.add(fields.get(i).getName());
        }
        pRS.getConf().setOutputKeyColumnNames(outputKeyCols);
        pRS.getConf().setKeySerializeInfo(keyTable);
      }
      return true;
    }

    /**
     * Returns merge directions between two RSs for criterias (ordering, number of reducers, reducer
     * keys, partition keys). Returns null if any of categories is not mergeable.
     *
     * <p>Values for each index can be -1, 0, 1 1. 0 means two configuration in the category is the
     * same 2. for -1, configuration of parent RS is more specific than child RS 3. for 1,
     * configuration of child RS is more specific than parent RS
     */
    private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      ReduceSinkDesc cConf = cRS.getConf();
      ReduceSinkDesc pConf = pRS.getConf();
      Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
      if (moveRSOrderTo == null) {
        return null;
      }
      Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
        return null;
      }
      List<ExprNodeDesc> ckeys = cConf.getKeyCols();
      List<ExprNodeDesc> pkeys = pConf.getKeyCols();
      Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
      if (moveKeyColTo == null) {
        return null;
      }
      List<ExprNodeDesc> cpars = cConf.getPartitionCols();
      List<ExprNodeDesc> ppars = pConf.getPartitionCols();
      Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
      if (movePartitionColTo == null) {
        return null;
      }
      Integer moveNumDistKeyTo =
          checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
      return new int[] {
        moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo
      };
    }

    private Integer checkNumDistributionKey(int cnd, int pnd) {
      // number of distribution keys of cRS is chosen only when numDistKeys of pRS
      // is 0 or less. In all other cases, distribution of the keys is based on
      // the pRS which is more generic than cRS.
      // Examples:
      // case 1: if pRS sort key is (a, b) and cRS sort key is (a, b, c) and number of
      // distribution keys are 2 and 3 resp. then after merge the sort keys will
      // be (a, b, c) while the number of distribution keys will be 2.
      // case 2: if pRS sort key is empty and number of distribution keys is 0
      // and if cRS sort key is (a, b) and number of distribution keys is 2 then
      // after merge new sort key will be (a, b) and number of distribution keys
      // will be 2.
      if (pnd <= 0) {
        return 1;
      }
      return 0;
    }

    /**
     * Overlapping part of keys should be the same between parent and child. And if child has more
     * keys than parent, non-overlapping part of keys should be backtrackable to parent.
     */
    private Integer checkExprs(
        List<ExprNodeDesc> ckeys,
        List<ExprNodeDesc> pkeys,
        ReduceSinkOperator cRS,
        ReduceSinkOperator pRS)
        throws SemanticException {
      Integer moveKeyColTo = 0;
      if (ckeys == null || ckeys.isEmpty()) {
        if (pkeys != null && !pkeys.isEmpty()) {
          moveKeyColTo = -1;
        }
      } else {
        if (pkeys == null || pkeys.isEmpty()) {
          for (ExprNodeDesc ckey : ckeys) {
            if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) {
              // cKey is not present in parent
              return null;
            }
          }
          moveKeyColTo = 1;
        } else {
          moveKeyColTo = sameKeys(ckeys, pkeys, cRS, pRS);
        }
      }
      return moveKeyColTo;
    }

    // backtrack key exprs of child to parent and compare it with parent's
    protected Integer sameKeys(
        List<ExprNodeDesc> cexprs, List<ExprNodeDesc> pexprs, Operator<?> child, Operator<?> parent)
        throws SemanticException {
      int common = Math.min(cexprs.size(), pexprs.size());
      int limit = Math.max(cexprs.size(), pexprs.size());
      int i = 0;
      for (; i < common; i++) {
        ExprNodeDesc pexpr = pexprs.get(i);
        ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent);
        if (cexpr == null || !pexpr.isSame(cexpr)) {
          return null;
        }
      }
      for (; i < limit; i++) {
        if (cexprs.size() > pexprs.size()) {
          if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) {
            // cKey is not present in parent
            return null;
          }
        }
      }
      return Integer.valueOf(cexprs.size()).compareTo(pexprs.size());
    }

    // order of overlapping keys should be exactly the same
    protected Integer checkOrder(String corder, String porder) {
      if (corder == null || corder.trim().equals("")) {
        if (porder == null || porder.trim().equals("")) {
          return 0;
        }
        return -1;
      }
      if (porder == null || porder.trim().equals("")) {
        return 1;
      }
      corder = corder.trim();
      porder = porder.trim();
      int target = Math.min(corder.length(), porder.length());
      if (!corder.substring(0, target).equals(porder.substring(0, target))) {
        return null;
      }
      return Integer.valueOf(corder.length()).compareTo(porder.length());
    }

    /**
     * If number of reducers for RS is -1, the RS can have any number of reducers. It's generally
     * true except for order-by or forced bucketing cases. if both of num-reducers are not -1, those
     * number should be the same.
     */
    protected Integer checkNumReducer(int creduce, int preduce) {
      if (creduce < 0) {
        if (preduce < 0) {
          return 0;
        }
        return -1;
      }
      if (preduce < 0) {
        return 1;
      }
      if (creduce != preduce) {
        return null;
      }
      return 0;
    }
  }

  static class GroupbyReducerProc extends AbsctractReducerReducerProc {

    // pRS-pGBY-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      GroupByOperator pGBY =
          CorrelationUtilities.findPossibleParent(
              cRS, GroupByOperator.class, dedupCtx.trustScript());
      if (pGBY == null) {
        return false;
      }
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              pGBY, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }

    // pRS-pGBY-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      GroupByOperator pGBY =
          CorrelationUtilities.findPossibleParent(
              start, GroupByOperator.class, dedupCtx.trustScript());
      if (pGBY == null) {
        return false;
      }
      ReduceSinkOperator pRS = CorrelationUtilities.getSingleParent(pGBY, ReduceSinkOperator.class);
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }
  }

  static class JoinReducerProc extends AbsctractReducerReducerProc {

    // pRS-pJOIN-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      JoinOperator pJoin =
          CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript());
      if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
        pJoin.getConf().setFixedAsSorted(true);
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        ReduceSinkOperator pRS =
            CorrelationUtilities.findPossibleParent(
                pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
        if (pRS != null) {
          pRS.getConf().setDeduplicated(true);
        }
        return true;
      }
      return false;
    }

    // pRS-pJOIN-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      JoinOperator pJoin =
          CorrelationUtilities.findPossibleParent(
              start, JoinOperator.class, dedupCtx.trustScript());
      if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
        pJoin.getConf().setFixedAsSorted(true);
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        ReduceSinkOperator pRS =
            CorrelationUtilities.findPossibleParent(
                pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
        if (pRS != null) {
          pRS.getConf().setDeduplicated(true);
        }
        return true;
      }
      return false;
    }
  }

  static class ReducerReducerProc extends AbsctractReducerReducerProc {

    // pRS-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              cRS, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }

    // pRS-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              start, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        if (dedupCtx.getPctx().getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
          return false;
        }
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }
  }
}
コード例 #4
0
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
     throws SemanticException {
   RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx;
   for (Node node : stack) {
     // For table scan operator,
     // check ReferencedColumns to make sure that only the index column is
     // selected for the following operators.
     if (node instanceof TableScanOperator) {
       TableScanOperator ts = (TableScanOperator) node;
       canApplyCtx.setTableScanOperator(ts);
       List<String> selectColumns = ts.getConf().getReferencedColumns();
       if (selectColumns == null || selectColumns.size() != 1) {
         canApplyCtx.setSelClauseColsFetchException(true);
         return null;
       } else {
         canApplyCtx.setIndexKey(selectColumns.get(0));
       }
     } else if (node instanceof SelectOperator) {
       // For select operators in the stack, we just add them
       if (canApplyCtx.getSelectOperators() == null) {
         canApplyCtx.setSelectOperators(new ArrayList<SelectOperator>());
       }
       canApplyCtx.getSelectOperators().add((SelectOperator) node);
     } else if (node instanceof GroupByOperator) {
       if (canApplyCtx.getGroupByOperators() == null) {
         canApplyCtx.setGroupByOperators(new ArrayList<GroupByOperator>());
       }
       // According to the pre-order,
       // the first GroupbyOperator is the one before RS
       // and the second one is the one after RS
       GroupByOperator operator = (GroupByOperator) node;
       canApplyCtx.getGroupByOperators().add(operator);
       if (!canApplyCtx.isQueryHasGroupBy()) {
         canApplyCtx.setQueryHasGroupBy(true);
         GroupByDesc conf = operator.getConf();
         List<AggregationDesc> aggrList = conf.getAggregators();
         if (aggrList == null
             || aggrList.size() != 1
             || !("count".equals(aggrList.get(0).getGenericUDAFName()))) {
           // In the current implementation, we make sure that only count is
           // in the function
           canApplyCtx.setAggFuncIsNotCount(true);
           return null;
         } else {
           List<ExprNodeDesc> para = aggrList.get(0).getParameters();
           if (para == null || para.size() == 0 || para.size() > 1) {
             canApplyCtx.setAggParameterException(true);
             return null;
           } else {
             ExprNodeDesc expr =
                 ExprNodeDescUtils.backtrack(
                     para.get(0), operator, (Operator<OperatorDesc>) stack.get(0));
             if (!(expr instanceof ExprNodeColumnDesc)) {
               canApplyCtx.setAggParameterException(true);
               return null;
             }
           }
         }
       }
     }
   }
   return null;
 }