Java ReduceSinkDesc.getKeyCols примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.apache.hadoop.hive.ql.plan

Класс/Тип: ReduceSinkDesc

Метод/Функция: getKeyCols

Примеров на hotexamples.com: 5

Java ReduceSinkDesc.getKeyCols - 5 примеров найдено. Это лучшие примеры Java кода для org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.getKeyCols, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getNumReducers(6)

getKeyCols(5)

getValueCols(3)

getOrder(2)

getOutputValueColumnNames(2)

getPartitionCols(2)

getNumDistributionKeys(1)

getTag(1)

setBucketCols(1)

setNumBuckets(1)

setOutputValueColumnNames(1)

setValueCols(1)

setValueSerializeInfo(1)

Пример #1

Показать файл

Файл: ReduceSinkDeDuplication.java Проект: phoenixhadoop/hive

 /**
  * Returns merge directions between two RSs for criterias (ordering, number of reducers, reducer
  * keys, partition keys). Returns null if any of categories is not mergeable.
  *
  * <p>Values for each index can be -1, 0, 1 1. 0 means two configuration in the category is the
  * same 2. for -1, configuration of parent RS is more specific than child RS 3. for 1,
  * configuration of child RS is more specific than parent RS
  */
 private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
     throws SemanticException {
   ReduceSinkDesc cConf = cRS.getConf();
   ReduceSinkDesc pConf = pRS.getConf();
   Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
   if (moveRSOrderTo == null) {
     return null;
   }
   Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
   if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
     return null;
   }
   List<ExprNodeDesc> ckeys = cConf.getKeyCols();
   List<ExprNodeDesc> pkeys = pConf.getKeyCols();
   Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
   if (moveKeyColTo == null) {
     return null;
   }
   List<ExprNodeDesc> cpars = cConf.getPartitionCols();
   List<ExprNodeDesc> ppars = pConf.getPartitionCols();
   Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
   if (movePartitionColTo == null) {
     return null;
   }
   Integer moveNumDistKeyTo =
       checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
   return new int[] {
     moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo
   };
 }

Пример #2

Показать файл

Файл: ReduceSinkDeDuplication.java Проект: phoenixhadoop/hive

    // for JOIN-RS case, it's not possible generally to merge if child has
    // less key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() < pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() != pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null || moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }

      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }

      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

      int cKeySize = cRSc.getKeyCols().size();
      for (int i = 0; i < cKeySize; i++) {
        ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }
      int cPartSize = cRSc.getPartitionCols().size();
      for (int i = 0; i < cPartSize; i++) {
        ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
        ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
        for (int tag = 0; tag < pRSs.length; tag++) {
          pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
        }
        int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted);
        if (found != i) {
          return false;
        }
      }

      if (moveReducerNumTo > 0) {
        for (ReduceSinkOperator pRS : pRSs) {
          pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
        }
      }
      return true;
    }

Пример #3

Показать файл

Файл: ColumnPrunerProcFactory.java Проект: cschenyuan/hive-hack

  private static void pruneReduceSinkOperator(
      boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx)
      throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);

    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();

    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();

    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }

        // In case there are multiple columns referenced to the same column name, we won't
        // do row resolve once more because the ColumnInfo in row resolver is already removed
        if (nm == null) {
          continue;
        }

        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }

      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }

    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable =
        PlanUtils.getReduceValueTableDesc(
            PlanUtils.getFieldSchemasFromColumnList(
                reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }

Пример #4

Показать файл

Файл: ColumnPrunerProcFactory.java Проект: cschenyuan/hive-hack

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();

      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys);
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      assert op.getNumChild() == 1;

      Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);

      List<String> childCols;
      if (child instanceof CommonJoinOperator) {
        childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag());
      } else {
        childCols = cppCtx.getPrunedColList(child);
      }
      List<ExprNodeDesc> valCols = conf.getValueCols();
      List<String> valColNames = conf.getOutputValueColumnNames();

      if (childCols != null) {
        boolean[] flags = new boolean[valCols.size()];

        for (String childCol : childCols) {
          int index = valColNames.indexOf(Utilities.removeValueTag(childCol));
          if (index < 0) {
            continue;
          }
          flags[index] = true;
          colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols());
        }

        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
        cppCtx.getPrunedColLists().put(op, colLists);
        return null;
      }

      // Reduce Sink contains the columns needed - no need to aggregate from
      // children
      for (ExprNodeDesc val : valCols) {
        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }

Пример #5

Показать файл

Файл: SkewJoinOptimizer.java Проект: Jude7/hive

    /**
     * Returns the skewed values in all the tables which are going to be scanned. If the join is on
     * columns c1, c2 and c3 on tables T1 and T2, T1 is skewed on c1 and c4 with the skew values
     * ((1,2),(3,4)), whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting
     * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))>
     *
     * @param op The join operator being optimized
     * @param tableScanOpsForJoin table scan operators which are parents of the join operator
     * @return map<join keys intersection skewedkeys, list of skewed values>.
     */
    private Map<List<ExprNodeDesc>, List<List<String>>> getSkewedValues(
        Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) {

      Map<List<ExprNodeDesc>, List<List<String>>> skewDataReturn =
          new HashMap<List<ExprNodeDesc>, List<List<String>>>();

      Map<List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData =
          new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>();

      // The join keys are available in the reduceSinkOperators before join
      for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) {
        ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf();

        if (rsDesc.getKeyCols() != null) {
          Table table = null;
          // Find the skew information corresponding to the table
          List<String> skewedColumns = null;
          List<List<String>> skewedValueList = null;

          // The join columns which are also skewed
          List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols =
              new ArrayList<ExprNodeDescEqualityWrapper>();

          // skewed Keys which intersect with join keys
          List<Integer> positionSkewedKeys = new ArrayList<Integer>();

          // Update the joinKeys appropriately.
          for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) {
            ExprNodeColumnDesc keyCol = null;

            // If the key column is not a column, then dont apply this optimization.
            // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445
            // for type conversion UDFs.
            if (keyColDesc instanceof ExprNodeColumnDesc) {
              keyCol = (ExprNodeColumnDesc) keyColDesc;
              if (table == null) {
                table = getTable(parseContext, reduceSinkOp, tableScanOpsForJoin);
                skewedColumns = table == null ? null : table.getSkewedColNames();
                // No skew on the table to take care of
                if ((skewedColumns == null) || (skewedColumns.isEmpty())) {
                  continue;
                }

                skewedValueList = table == null ? null : table.getSkewedColValues();
              }
              int pos = skewedColumns.indexOf(keyCol.getColumn());
              if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) {
                positionSkewedKeys.add(pos);
                ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyCol.clone();
                keyColClone.setTabAlias(null);
                joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone));
              }
            }
          }

          // If the skew keys match the join keys, then add it to the list
          if ((skewedColumns != null) && (!skewedColumns.isEmpty())) {
            if (!joinKeysSkewedCols.isEmpty()) {
              // If the join keys matches the skewed keys, use the table skewed keys
              List<List<String>> skewedJoinValues;
              if (skewedColumns.size() == positionSkewedKeys.size()) {
                skewedJoinValues = skewedValueList;
              } else {
                skewedJoinValues = getSkewedJoinValues(skewedValueList, positionSkewedKeys);
              }

              List<List<String>> oldSkewedJoinValues = skewData.get(joinKeysSkewedCols);
              if (oldSkewedJoinValues == null) {
                oldSkewedJoinValues = new ArrayList<List<String>>();
              }
              for (List<String> skewValue : skewedJoinValues) {
                if (!oldSkewedJoinValues.contains(skewValue)) {
                  oldSkewedJoinValues.add(skewValue);
                }
              }

              skewData.put(joinKeysSkewedCols, oldSkewedJoinValues);
            }
          }
        }
      }

      // convert skewData to contain ExprNodeDesc in the keys
      for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry :
          skewData.entrySet()) {
        List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) {
          skewedKeyJoinCols.add(key.getExprNodeDesc());
        }
        skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue());
      }

      return skewDataReturn;
    }