/**
  * Returns merge directions between two RSs for criterias (ordering, number of reducers, reducer
  * keys, partition keys). Returns null if any of categories is not mergeable.
  *
  * <p>Values for each index can be -1, 0, 1 1. 0 means two configuration in the category is the
  * same 2. for -1, configuration of parent RS is more specific than child RS 3. for 1,
  * configuration of child RS is more specific than parent RS
  */
 private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
     throws SemanticException {
   ReduceSinkDesc cConf = cRS.getConf();
   ReduceSinkDesc pConf = pRS.getConf();
   Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
   if (moveRSOrderTo == null) {
     return null;
   }
   Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
   if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
     return null;
   }
   List<ExprNodeDesc> ckeys = cConf.getKeyCols();
   List<ExprNodeDesc> pkeys = pConf.getKeyCols();
   Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
   if (moveKeyColTo == null) {
     return null;
   }
   List<ExprNodeDesc> cpars = cConf.getPartitionCols();
   List<ExprNodeDesc> ppars = pConf.getPartitionCols();
   Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
   if (movePartitionColTo == null) {
     return null;
   }
   Integer moveNumDistKeyTo =
       checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
   return new int[] {
     moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo
   };
 }
    // for JOIN-RS case, it's not possible generally to merge if child has
    // less key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }

      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }

      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

      int cKeySize = cRSc.getKeyCols().size();
      for (int i = 0; i < cKeySize; i++) {
        ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }
      int cPartSize = cRSc.getPartitionCols().size();
      for (int i = 0; i < cPartSize; i++) {
        ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }

      if (moveReducerNumTo > 0) {
        for (ReduceSinkOperator pRS : pRSs) {
          pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
        }
      }
      return true;
    }
  private static void pruneReduceSinkOperator(
      boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx)
      throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);

    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();

    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();

    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }

        // In case there are multiple columns referenced to the same column name, we won't
        // do row resolve once more because the ColumnInfo in row resolver is already removed
        if (nm == null) {
          continue;
        }

        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }

      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }

    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable =
        PlanUtils.getReduceValueTableDesc(
            PlanUtils.getFieldSchemasFromColumnList(
                reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();

      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys);
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      assert op.getNumChild() == 1;

      Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);

      List<String> childCols;
      if (child instanceof CommonJoinOperator) {
        childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag());
      } else {
        childCols = cppCtx.getPrunedColList(child);
      }
      List<ExprNodeDesc> valCols = conf.getValueCols();
      List<String> valColNames = conf.getOutputValueColumnNames();

      if (childCols != null) {
        boolean[] flags = new boolean[valCols.size()];

        for (String childCol : childCols) {
          int index = valColNames.indexOf(Utilities.removeValueTag(childCol));
          if (index < 0) {
            continue;
          }
          flags[index] = true;
          colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols());
        }

        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
        cppCtx.getPrunedColLists().put(op, colLists);
        return null;
      }

      // Reduce Sink contains the columns needed - no need to aggregate from
      // children
      for (ExprNodeDesc val : valCols) {
        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
Пример #5
0
    /**
     * Returns the skewed values in all the tables which are going to be scanned. If the join is on
     * columns c1, c2 and c3 on tables T1 and T2, T1 is skewed on c1 and c4 with the skew values
     * ((1,2),(3,4)), whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting
     * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))>
     *
     * @param op The join operator being optimized
     * @param tableScanOpsForJoin table scan operators which are parents of the join operator
     * @return map<join keys intersection skewedkeys, list of skewed values>.
     */
    private Map<List<ExprNodeDesc>, List<List<String>>> getSkewedValues(
        Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) {

      Map<List<ExprNodeDesc>, List<List<String>>> skewDataReturn =
          new HashMap<List<ExprNodeDesc>, List<List<String>>>();

      Map<List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData =
          new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>();

      // The join keys are available in the reduceSinkOperators before join
      for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) {
        ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf();

        if (rsDesc.getKeyCols() != null) {
          Table table = null;
          // Find the skew information corresponding to the table
          List<String> skewedColumns = null;
          List<List<String>> skewedValueList = null;

          // The join columns which are also skewed
          List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols =
              new ArrayList<ExprNodeDescEqualityWrapper>();

          // skewed Keys which intersect with join keys
          List<Integer> positionSkewedKeys = new ArrayList<Integer>();

          // Update the joinKeys appropriately.
          for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) {
            ExprNodeColumnDesc keyCol = null;

            // If the key column is not a column, then dont apply this optimization.
            // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445
            // for type conversion UDFs.
            if (keyColDesc instanceof ExprNodeColumnDesc) {
              keyCol = (ExprNodeColumnDesc) keyColDesc;
              if (table == null) {
                table = getTable(parseContext, reduceSinkOp, tableScanOpsForJoin);
                skewedColumns = table == null ? null : table.getSkewedColNames();
                // No skew on the table to take care of
                if ((skewedColumns == null) || (skewedColumns.isEmpty())) {
                  continue;
                }

                skewedValueList = table == null ? null : table.getSkewedColValues();
              }
              int pos = skewedColumns.indexOf(keyCol.getColumn());
              if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) {
                positionSkewedKeys.add(pos);
                ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyCol.clone();
                keyColClone.setTabAlias(null);
                joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone));
              }
            }
          }

          // If the skew keys match the join keys, then add it to the list
          if ((skewedColumns != null) && (!skewedColumns.isEmpty())) {
            if (!joinKeysSkewedCols.isEmpty()) {
              // If the join keys matches the skewed keys, use the table skewed keys
              List<List<String>> skewedJoinValues;
              if (skewedColumns.size() == positionSkewedKeys.size()) {
                skewedJoinValues = skewedValueList;
              } else {
                skewedJoinValues = getSkewedJoinValues(skewedValueList, positionSkewedKeys);
              }

              List<List<String>> oldSkewedJoinValues = skewData.get(joinKeysSkewedCols);
              if (oldSkewedJoinValues == null) {
                oldSkewedJoinValues = new ArrayList<List<String>>();
              }
              for (List<String> skewValue : skewedJoinValues) {
                if (!oldSkewedJoinValues.contains(skewValue)) {
                  oldSkewedJoinValues.add(skewValue);
                }
              }

              skewData.put(joinKeysSkewedCols, oldSkewedJoinValues);
            }
          }
        }
      }

      // convert skewData to contain ExprNodeDesc in the keys
      for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry :
          skewData.entrySet()) {
        List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) {
          skewedKeyJoinCols.add(key.getExprNodeDesc());
        }
        skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue());
      }

      return skewDataReturn;
    }