@Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
     throws SemanticException {
   JoinOperator op = (JoinOperator) nd;
   pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), null, false);
   return null;
 }
Example #2
0
  /**
   * Find all big tables from STREAMTABLE hints.
   *
   * @param joinCtx The join context
   * @return Set of all big tables
   */
  private Set<String> getBigTables(ParseContext joinCtx) {
    Set<String> bigTables = new HashSet<String>();

    for (JoinOperator joinOp : joinCtx.getJoinOps()) {
      if (joinOp.getConf().getStreamAliases() != null) {
        bigTables.addAll(joinOp.getConf().getStreamAliases());
      }
    }

    return bigTables;
  }
Example #3
0
    /*
     * Get the list of table scan operators for this join. A interface supportSkewJoinOptimization
     * has been provided. Currently, it is only enabled for simple filters and selects.
     */
    private boolean getTableScanOpsForJoin(JoinOperator op, List<TableScanOperator> tsOps) {

      for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
        if (!getTableScanOps(parent, tsOps)) {
          return false;
        }
      }
      return true;
    }
 // pRS-pJOIN-cRS
 @Override
 public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
     throws SemanticException {
   JoinOperator pJoin =
       CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript());
   if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
     pJoin.getConf().setFixedAsSorted(true);
     CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
     ReduceSinkOperator pRS =
         CorrelationUtilities.findPossibleParent(
             pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
     if (pRS != null) {
       pRS.getConf().setDeduplicated(true);
     }
     return true;
   }
   return false;
 }
    // for JOIN-RS case, it's not possible generally to merge if child has
    // less key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }

      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }

      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

      int cKeySize = cRSc.getKeyCols().size();
      for (int i = 0; i < cKeySize; i++) {
        ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }
      int cPartSize = cRSc.getPartitionCols().size();
      for (int i = 0; i < cPartSize; i++) {
        ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }

      if (moveReducerNumTo > 0) {
        for (ReduceSinkOperator pRS : pRSs) {
          pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
        }
      }
      return true;
    }
 // pRS-pJOIN-cRS-cGBY
 @Override
 public Object process(
     ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
     throws SemanticException {
   Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
   JoinOperator pJoin =
       CorrelationUtilities.findPossibleParent(
           start, JoinOperator.class, dedupCtx.trustScript());
   if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
     pJoin.getConf().setFixedAsSorted(true);
     CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
     ReduceSinkOperator pRS =
         CorrelationUtilities.findPossibleParent(
             pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
     if (pRS != null) {
       pRS.getConf().setDeduplicated(true);
     }
     return true;
   }
   return false;
 }
Example #7
0
  /**
   * Reorder the tables in a join operator appropriately (by reordering the tags of the reduces
   * sinks).
   *
   * @param joinOp The join operator to be processed
   * @param bigTables Set of all big tables
   */
  private void reorder(JoinOperator joinOp, Set<String> bigTables) {
    int count = joinOp.getParentOperators().size();

    // Find the biggest reduce sink
    int biggestPos = count - 1;
    int biggestSize = getOutputSize(joinOp.getParentOperators().get(biggestPos), bigTables);
    for (int i = 0; i < count - 1; i++) {
      int currSize = getOutputSize(joinOp.getParentOperators().get(i), bigTables);
      if (currSize > biggestSize) {
        biggestSize = currSize;
        biggestPos = i;
      }
    }

    // Reorder tags if need be
    if (biggestPos != (count - 1)) {
      Byte[] tagOrder = joinOp.getConf().getTagOrder();
      Byte temp = tagOrder[biggestPos];
      tagOrder[biggestPos] = tagOrder[count - 1];
      tagOrder[count - 1] = temp;

      // Update tags of reduce sinks
      ((ReduceSinkOperator) joinOp.getParentOperators().get(biggestPos))
          .getConf()
          .setTag(count - 1);
      ((ReduceSinkOperator) joinOp.getParentOperators().get(count - 1))
          .getConf()
          .setTag(biggestPos);
    }
  }
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp(
            "R1",
            "("
                + FilterOperator.getOperatorName()
                + "%"
                + ReduceSinkOperator.getOperatorName()
                + "%"
                + JoinOperator.getOperatorName()
                + "%)"),
        new JoinTransitive());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    TransitiveContext context = new TransitiveContext();
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    GraphWalker ogw = new LevelOrderWalker(disp, 2);

    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);

    Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();

    // insert new filter between RS and parent of RS
    for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
      ReduceSinkOperator reducer = entry.getKey();
      Operator<?> parent = reducer.getParentOperators().get(0);

      List<ExprNodeDesc> exprs = entry.getValue();
      if (parent instanceof FilterOperator) {
        exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        ((FilterOperator) parent).getConf().setPredicate(merged);
      } else {
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        RowSchema parentRS = parent.getSchema();
        Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
      }
    }

    return pGraphContext;
  }
Example #9
0
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      // We should be having a tree which looks like this
      //  TS -> * -> RS -
      //                  \
      //                   -> JOIN -> ..
      //                  /
      //  TS -> * -> RS -
      //
      // We are in the join operator now.

      SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx;
      parseContext = ctx.getpGraphContext();

      JoinOperator joinOp = (JoinOperator) nd;
      // This join has already been processed
      if (ctx.getDoneJoins().contains(joinOp)) {
        return null;
      }

      ctx.getDoneJoins().add(joinOp);

      Operator<? extends OperatorDesc> currOp = joinOp;
      boolean processSelect = false;

      // Is there a select following
      // Clone the select also. It is useful for a follow-on optimization where the union
      // followed by a select star is completely removed.
      if ((joinOp.getChildOperators().size() == 1)
          && (joinOp.getChildOperators().get(0) instanceof SelectOperator)) {
        currOp = joinOp.getChildOperators().get(0);
        processSelect = true;
      }

      List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) {
        return null;
      }

      if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) {
        return null;
      }

      // Get the skewed values in all the tables
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValues =
          getSkewedValues(joinOp, tableScanOpsForJoin);

      // If there are no skewed values, nothing needs to be done
      if (skewedValues == null || skewedValues.size() == 0) {
        return null;
      }

      // After this optimization, the tree should be like:
      //  TS -> (FIL "skewed rows") * -> RS -
      //                                     \
      //                                       ->   JOIN
      //                                     /           \
      //  TS -> (FIL "skewed rows") * -> RS -             \
      //                                                   \
      //                                                     ->  UNION -> ..
      //                                                   /
      //  TS -> (FIL "no skewed rows") * -> RS -          /
      //                                        \        /
      //                                         -> JOIN
      //                                        /
      //  TS -> (FIL "no skewed rows") * -> RS -
      //

      // Create a clone of the operator
      Operator<? extends OperatorDesc> currOpClone;
      try {
        currOpClone = currOp.clone();
        insertRowResolvers(currOp, currOpClone, ctx);
      } catch (CloneNotSupportedException e) {
        LOG.debug("Operator tree could not be cloned");
        return null;
      }

      JoinOperator joinOpClone;
      if (processSelect) {
        joinOpClone = (JoinOperator) (currOpClone.getParentOperators().get(0));
      } else {
        joinOpClone = (JoinOperator) currOpClone;
      }
      joinOpClone.getConf().cloneQBJoinTreeProps(joinOp.getConf());
      parseContext.getJoinOps().add(joinOpClone);

      List<TableScanOperator> tableScanCloneOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOpClone, tableScanCloneOpsForJoin)) {
        LOG.debug("Operator tree not properly cloned!");
        return null;
      }

      // Put the filter "skewed column = skewed keys" in op
      // and "skewed columns != skewed keys" in selectOpClone
      insertSkewFilter(tableScanOpsForJoin, skewedValues, true);

      insertSkewFilter(tableScanCloneOpsForJoin, skewedValues, false);

      // Update the topOps appropriately
      Map<String, Operator<? extends OperatorDesc>> topOps = getTopOps(joinOpClone);
      Map<String, Operator<? extends OperatorDesc>> origTopOps = parseContext.getTopOps();

      for (Entry<String, Operator<? extends OperatorDesc>> topOp : topOps.entrySet()) {
        TableScanOperator tso = (TableScanOperator) topOp.getValue();
        String tabAlias = tso.getConf().getAlias();
        int initCnt = 1;
        String newAlias = "subquery" + initCnt + ":" + tabAlias;
        while (origTopOps.containsKey(newAlias)) {
          initCnt++;
          newAlias = "subquery" + initCnt + ":" + tabAlias;
        }

        parseContext.getTopOps().put(newAlias, tso);
        setUpAlias(joinOp, joinOpClone, tabAlias, newAlias, tso);
      }

      // Now do a union of the select operators: selectOp and selectOpClone
      // Store the operator that follows the select after the join, we will be
      // adding this as a child to the Union later
      List<Operator<? extends OperatorDesc>> finalOps = currOp.getChildOperators();
      currOp.setChildOperators(null);
      currOpClone.setChildOperators(null);

      // Make the union operator
      List<Operator<? extends OperatorDesc>> oplist =
          new ArrayList<Operator<? extends OperatorDesc>>();
      oplist.add(currOp);
      oplist.add(currOpClone);
      Operator<? extends OperatorDesc> unionOp =
          OperatorFactory.getAndMakeChild(
              new UnionDesc(), new RowSchema(currOp.getSchema().getSignature()), oplist);

      // Introduce a select after the union
      List<Operator<? extends OperatorDesc>> unionList =
          new ArrayList<Operator<? extends OperatorDesc>>();
      unionList.add(unionOp);

      Operator<? extends OperatorDesc> selectUnionOp =
          OperatorFactory.getAndMakeChild(
              new SelectDesc(true), new RowSchema(unionOp.getSchema().getSignature()), unionList);

      // add the finalOp after the union
      selectUnionOp.setChildOperators(finalOps);
      // replace the original selectOp in the parents with selectUnionOp
      for (Operator<? extends OperatorDesc> finalOp : finalOps) {
        finalOp.replaceParent(currOp, selectUnionOp);
      }
      return null;
    }
Example #10
0
 /** Set alias in the cloned join tree */
 private static void setUpAlias(
     JoinOperator origin,
     JoinOperator cloned,
     String origAlias,
     String newAlias,
     Operator<? extends OperatorDesc> topOp) {
   cloned.getConf().getAliasToOpInfo().remove(origAlias);
   cloned.getConf().getAliasToOpInfo().put(newAlias, topOp);
   if (origin.getConf().getLeftAlias().equals(origAlias)) {
     cloned.getConf().setLeftAlias(null);
     cloned.getConf().setLeftAlias(newAlias);
   }
   replaceAlias(
       origin.getConf().getLeftAliases(),
       cloned.getConf().getLeftAliases(),
       origAlias,
       newAlias);
   replaceAlias(
       origin.getConf().getRightAliases(),
       cloned.getConf().getRightAliases(),
       origAlias,
       newAlias);
   replaceAlias(
       origin.getConf().getBaseSrc(), cloned.getConf().getBaseSrc(), origAlias, newAlias);
   replaceAlias(
       origin.getConf().getMapAliases(), cloned.getConf().getMapAliases(), origAlias, newAlias);
   replaceAlias(
       origin.getConf().getStreamAliases(),
       cloned.getConf().getStreamAliases(),
       origAlias,
       newAlias);
 }
/**
 * If two reducer sink operators share the same partition/sort columns and order, they can be
 * merged. This should happen after map join optimization because map join optimization will remove
 * reduce sink operators.
 *
 * <p>This optimizer removes/replaces child-RS (not parent) which is safer way for
 * DefaultGraphWalker.
 */
public class ReduceSinkDeDuplication extends Transform {

  private static final String RS = ReduceSinkOperator.getOperatorName();
  private static final String GBY = GroupByOperator.getOperatorName();
  private static final String JOIN = JoinOperator.getOperatorName();

  protected ParseContext pGraphContext;

  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    // generate pruned column list for all relevant operators
    ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);

    // for auto convert map-joins, it not safe to dedup in here (todo)
    boolean mergeJoins =
        !pctx.getConf().getBoolVar(HIVECONVERTJOIN)
            && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK)
            && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)
            && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN);

    // If multiple rules can be matched with same cost, last rule will be choosen as a processor
    // see DefaultRuleDispatcher#dispatch()
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp("R1", RS + "%.*%" + RS + "%"),
        ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
    opRules.put(
        new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"),
        ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
    if (mergeJoins) {
      opRules.put(
          new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"),
          ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
    }
    // TODO RS+JOIN

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp =
        new DefaultRuleDispatcher(
            ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);

    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pGraphContext;
  }

  protected class ReduceSinkDeduplicateProcCtx extends AbstractCorrelationProcCtx {

    public ReduceSinkDeduplicateProcCtx(ParseContext pctx) {
      super(pctx);
    }
  }

  static class ReduceSinkDeduplicateProcFactory {

    public static NodeProcessor getReducerReducerProc() {
      return new ReducerReducerProc();
    }

    public static NodeProcessor getGroupbyReducerProc() {
      return new GroupbyReducerProc();
    }

    public static NodeProcessor getJoinReducerProc() {
      return new JoinReducerProc();
    }

    public static NodeProcessor getDefaultProc() {
      return new DefaultProc();
    }
  }

  /*
   * do nothing.
   */
  static class DefaultProc implements NodeProcessor {
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      return null;
    }
  }

  public abstract static class AbsctractReducerReducerProc implements NodeProcessor {

    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx;
      if (dedupCtx.hasBeenRemoved((Operator<?>) nd)) {
        return false;
      }
      ReduceSinkOperator cRS = (ReduceSinkOperator) nd;
      Operator<?> child = CorrelationUtilities.getSingleChild(cRS);
      if (child instanceof JoinOperator) {
        return false; // not supported
      }
      if (child instanceof GroupByOperator) {
        GroupByOperator cGBY = (GroupByOperator) child;
        if (!CorrelationUtilities.hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) {
          return process(cRS, cGBY, dedupCtx);
        }
        return false;
      }
      if (child instanceof SelectOperator) {
        return process(cRS, dedupCtx);
      }
      return false;
    }

    protected abstract Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException;

    protected abstract Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException;

    // for JOIN-RS case, it's not possible generally to merge if child has
    // less key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }

      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }

      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

      int cKeySize = cRSc.getKeyCols().size();
      for (int i = 0; i < cKeySize; i++) {
        ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }
      int cPartSize = cRSc.getPartitionCols().size();
      for (int i = 0; i < cPartSize; i++) {
        ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }

      if (moveReducerNumTo > 0) {
        for (ReduceSinkOperator pRS : pRSs) {
          pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
        }
      }
      return true;
    }

    /**
     * Current RSDedup remove/replace child RS. For key columns, sorting order, and the number of
     * reducers, copy more specific part of configurations of child RS to that of parent RS. For
     * partitioning columns, if both child RS and parent RS have been assigned partitioning columns,
     * we will choose the more general partitioning columns. If parent RS has not been assigned any
     * partitioning column, we will use partitioning columns (if exist) of child RS.
     */
    protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      int[] result = checkStatus(cRS, pRS, minReducer);
      if (result == null) {
        return false;
      }

      if (result[0] > 0) {
        // The sorting columns of the child RS are more specific than
        // those of the parent RS. Assign sorting columns of the child RS
        // to the parent RS.
        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
        pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
      }

      if (result[1] < 0) {
        // The partitioning columns of the parent RS are more specific than
        // those of the child RS.
        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
        if (childPCs != null && !childPCs.isEmpty()) {
          // If partitioning columns of the child RS are assigned,
          // assign these to the partitioning columns of the parent RS.
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      } else if (result[1] > 0) {
        // The partitioning columns of the child RS are more specific than
        // those of the parent RS.
        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
        if (parentPCs == null || parentPCs.isEmpty()) {
          // If partitioning columns of the parent RS are not assigned,
          // assign partitioning columns of the child RS to the parent RS.
          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      }

      if (result[2] > 0) {
        // The sorting order of the child RS is more specific than
        // that of the parent RS. Assign the sorting order of the child RS
        // to the parent RS.
        if (result[0] <= 0) {
          // Sorting columns of the parent RS are more specific than those of the
          // child RS but Sorting order of the child RS is more specific than
          // that of the parent RS.
          throw new SemanticException(
              "Sorting columns and order don't match. "
                  + "Try set "
                  + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION
                  + "=false;");
        }
        pRS.getConf().setOrder(cRS.getConf().getOrder());
      }

      if (result[3] > 0) {
        // The number of reducers of the child RS is more specific than
        // that of the parent RS. Assign the number of reducers of the child RS
        // to the parent RS.
        pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
      }

      if (result[4] > 0) {
        // This case happens only when pRS key is empty in which case we can use
        // number of distribution keys and key serialization info from cRS
        pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys());
        List<FieldSchema> fields =
            PlanUtils.getFieldSchemasFromColumnList(pRS.getConf().getKeyCols(), "reducesinkkey");
        TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder());
        ArrayList<String> outputKeyCols = Lists.newArrayList();
        for (int i = 0; i < fields.size(); i++) {
          outputKeyCols.add(fields.get(i).getName());
        }
        pRS.getConf().setOutputKeyColumnNames(outputKeyCols);
        pRS.getConf().setKeySerializeInfo(keyTable);
      }
      return true;
    }

    /**
     * Returns merge directions between two RSs for criterias (ordering, number of reducers, reducer
     * keys, partition keys). Returns null if any of categories is not mergeable.
     *
     * <p>Values for each index can be -1, 0, 1 1. 0 means two configuration in the category is the
     * same 2. for -1, configuration of parent RS is more specific than child RS 3. for 1,
     * configuration of child RS is more specific than parent RS
     */
    private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      ReduceSinkDesc cConf = cRS.getConf();
      ReduceSinkDesc pConf = pRS.getConf();
      Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
      if (moveRSOrderTo == null) {
        return null;
      }
      Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
        return null;
      }
      List<ExprNodeDesc> ckeys = cConf.getKeyCols();
      List<ExprNodeDesc> pkeys = pConf.getKeyCols();
      Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
      if (moveKeyColTo == null) {
        return null;
      }
      List<ExprNodeDesc> cpars = cConf.getPartitionCols();
      List<ExprNodeDesc> ppars = pConf.getPartitionCols();
      Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
      if (movePartitionColTo == null) {
        return null;
      }
      Integer moveNumDistKeyTo =
          checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
      return new int[] {
        moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo
      };
    }

    private Integer checkNumDistributionKey(int cnd, int pnd) {
      // number of distribution keys of cRS is chosen only when numDistKeys of pRS
      // is 0 or less. In all other cases, distribution of the keys is based on
      // the pRS which is more generic than cRS.
      // Examples:
      // case 1: if pRS sort key is (a, b) and cRS sort key is (a, b, c) and number of
      // distribution keys are 2 and 3 resp. then after merge the sort keys will
      // be (a, b, c) while the number of distribution keys will be 2.
      // case 2: if pRS sort key is empty and number of distribution keys is 0
      // and if cRS sort key is (a, b) and number of distribution keys is 2 then
      // after merge new sort key will be (a, b) and number of distribution keys
      // will be 2.
      if (pnd <= 0) {
        return 1;
      }
      return 0;
    }

    /**
     * Overlapping part of keys should be the same between parent and child. And if child has more
     * keys than parent, non-overlapping part of keys should be backtrackable to parent.
     */
    private Integer checkExprs(
        List<ExprNodeDesc> ckeys,
        List<ExprNodeDesc> pkeys,
        ReduceSinkOperator cRS,
        ReduceSinkOperator pRS)
        throws SemanticException {
      Integer moveKeyColTo = 0;
      if (ckeys == null || ckeys.isEmpty()) {
        if (pkeys != null && !pkeys.isEmpty()) {
          moveKeyColTo = -1;
        }
      } else {
        if (pkeys == null || pkeys.isEmpty()) {
          for (ExprNodeDesc ckey : ckeys) {
            if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) {
              // cKey is not present in parent
              return null;
            }
          }
          moveKeyColTo = 1;
        } else {
          moveKeyColTo = sameKeys(ckeys, pkeys, cRS, pRS);
        }
      }
      return moveKeyColTo;
    }

    // backtrack key exprs of child to parent and compare it with parent's
    protected Integer sameKeys(
        List<ExprNodeDesc> cexprs, List<ExprNodeDesc> pexprs, Operator<?> child, Operator<?> parent)
        throws SemanticException {
      int common = Math.min(cexprs.size(), pexprs.size());
      int limit = Math.max(cexprs.size(), pexprs.size());
      int i = 0;
      for (; i < common; i++) {
        ExprNodeDesc pexpr = pexprs.get(i);
        ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent);
        if (cexpr == null || !pexpr.isSame(cexpr)) {
          return null;
        }
      }
      for (; i < limit; i++) {
        if (cexprs.size() > pexprs.size()) {
          if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) {
            // cKey is not present in parent
            return null;
          }
        }
      }
      return Integer.valueOf(cexprs.size()).compareTo(pexprs.size());
    }

    // order of overlapping keys should be exactly the same
    protected Integer checkOrder(String corder, String porder) {
      if (corder == null || corder.trim().equals("")) {
        if (porder == null || porder.trim().equals("")) {
          return 0;
        }
        return -1;
      }
      if (porder == null || porder.trim().equals("")) {
        return 1;
      }
      corder = corder.trim();
      porder = porder.trim();
      int target = Math.min(corder.length(), porder.length());
      if (!corder.substring(0, target).equals(porder.substring(0, target))) {
        return null;
      }
      return Integer.valueOf(corder.length()).compareTo(porder.length());
    }

    /**
     * If number of reducers for RS is -1, the RS can have any number of reducers. It's generally
     * true except for order-by or forced bucketing cases. if both of num-reducers are not -1, those
     * number should be the same.
     */
    protected Integer checkNumReducer(int creduce, int preduce) {
      if (creduce < 0) {
        if (preduce < 0) {
          return 0;
        }
        return -1;
      }
      if (preduce < 0) {
        return 1;
      }
      if (creduce != preduce) {
        return null;
      }
      return 0;
    }
  }

  static class GroupbyReducerProc extends AbsctractReducerReducerProc {

    // pRS-pGBY-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      GroupByOperator pGBY =
          CorrelationUtilities.findPossibleParent(
              cRS, GroupByOperator.class, dedupCtx.trustScript());
      if (pGBY == null) {
        return false;
      }
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              pGBY, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }

    // pRS-pGBY-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      GroupByOperator pGBY =
          CorrelationUtilities.findPossibleParent(
              start, GroupByOperator.class, dedupCtx.trustScript());
      if (pGBY == null) {
        return false;
      }
      ReduceSinkOperator pRS = CorrelationUtilities.getSingleParent(pGBY, ReduceSinkOperator.class);
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }
  }

  static class JoinReducerProc extends AbsctractReducerReducerProc {

    // pRS-pJOIN-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      JoinOperator pJoin =
          CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript());
      if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
        pJoin.getConf().setFixedAsSorted(true);
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        ReduceSinkOperator pRS =
            CorrelationUtilities.findPossibleParent(
                pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
        if (pRS != null) {
          pRS.getConf().setDeduplicated(true);
        }
        return true;
      }
      return false;
    }

    // pRS-pJOIN-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      JoinOperator pJoin =
          CorrelationUtilities.findPossibleParent(
              start, JoinOperator.class, dedupCtx.trustScript());
      if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
        pJoin.getConf().setFixedAsSorted(true);
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        ReduceSinkOperator pRS =
            CorrelationUtilities.findPossibleParent(
                pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
        if (pRS != null) {
          pRS.getConf().setDeduplicated(true);
        }
        return true;
      }
      return false;
    }
  }

  static class ReducerReducerProc extends AbsctractReducerReducerProc {

    // pRS-cRS
    @Override
    public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              cRS, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }

    // pRS-cRS-cGBY
    @Override
    public Object process(
        ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
        throws SemanticException {
      Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
      ReduceSinkOperator pRS =
          CorrelationUtilities.findPossibleParent(
              start, ReduceSinkOperator.class, dedupCtx.trustScript());
      if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
        if (dedupCtx.getPctx().getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
          return false;
        }
        CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
        pRS.getConf().setDeduplicated(true);
        return true;
      }
      return false;
    }
  }
}