// Return true if the table is bucketed/sorted by the specified positions
    // The number of buckets, the sort order should also match along with the
    // columns which are bucketed/sorted
    private boolean checkTable(
        Table table,
        List<Integer> bucketPositionsDest,
        List<Integer> sortPositionsDest,
        List<Integer> sortOrderDest,
        int numBucketsDest) {
      // The bucketing and sorting positions should exactly match
      int numBuckets = table.getNumBuckets();
      if (numBucketsDest != numBuckets) {
        return false;
      }

      List<Integer> tableBucketPositions =
          getBucketPositions(table.getBucketCols(), table.getCols());
      ObjectPair<List<Integer>, List<Integer>> tableSortPositionsOrder =
          getSortPositionsOrder(table.getSortCols(), table.getCols());
      return bucketPositionsDest.equals(tableBucketPositions)
          && sortPositionsDest.equals(tableSortPositionsOrder.getFirst())
          && sortOrderDest.equals(tableSortPositionsOrder.getSecond());
    }
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {

      // If the reduce sink has not been introduced due to bucketing/sorting, ignore it
      FileSinkOperator fsOp = (FileSinkOperator) nd;
      ReduceSinkOperator rsOp =
          (ReduceSinkOperator) fsOp.getParentOperators().get(0).getParentOperators().get(0);

      List<ReduceSinkOperator> rsOps =
          pGraphContext.getReduceSinkOperatorsAddedByEnforceBucketingSorting();
      // nothing to do
      if ((rsOps != null) && (!rsOps.contains(rsOp))) {
        return null;
      }

      // Don't do this optimization with updates or deletes
      if (pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.UPDATE
          || pGraphContext.getContext().getAcidOperation() == AcidUtils.Operation.DELETE) {
        return null;
      }

      if (stack.get(0) instanceof TableScanOperator) {
        TableScanOperator tso = ((TableScanOperator) stack.get(0));
        if (SemanticAnalyzer.isAcidTable(tso.getConf().getTableMetadata())) {
          /*ACID tables have complex directory layout and require merging of delta files
           * on read thus we should not try to read bucket files directly*/
          return null;
        }
      }
      // Support for dynamic partitions can be added later
      if (fsOp.getConf().getDynPartCtx() != null) {
        return null;
      }

      // No conversion is possible for the reduce keys
      for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) {
        if (!(keyCol instanceof ExprNodeColumnDesc)) {
          return null;
        }
      }

      Table destTable = fsOp.getConf().getTable();
      if (destTable == null) {
        return null;
      }
      int numBucketsDestination = destTable.getNumBuckets();

      // Get the positions for sorted and bucketed columns
      // For sorted columns, also get the order (ascending/descending) - that should
      // also match for this to be converted to a map-only job.
      // Get the positions for sorted and bucketed columns
      // For sorted columns, also get the order (ascending/descending) - that should
      // also match for this to be converted to a map-only job.
      List<Integer> bucketPositions =
          getBucketPositions(destTable.getBucketCols(), destTable.getCols());
      ObjectPair<List<Integer>, List<Integer>> sortOrderPositions =
          getSortPositionsOrder(destTable.getSortCols(), destTable.getCols());
      List<Integer> sortPositions = sortOrderPositions.getFirst();
      List<Integer> sortOrder = sortOrderPositions.getSecond();
      boolean useBucketSortPositions = true;

      // Only selects and filters are allowed
      Operator<? extends OperatorDesc> op = rsOp;
      // TableScan will also be followed by a Select Operator. Find the expressions for the
      // bucketed/sorted columns for the destination table
      List<ExprNodeColumnDesc> sourceTableBucketCols = new ArrayList<ExprNodeColumnDesc>();
      List<ExprNodeColumnDesc> sourceTableSortCols = new ArrayList<ExprNodeColumnDesc>();
      op = op.getParentOperators().get(0);

      while (true) {
        if (!(op instanceof TableScanOperator)
            && !(op instanceof FilterOperator)
            && !(op instanceof SelectOperator)
            && !(op instanceof SMBMapJoinOperator)) {
          return null;
        }

        if (op instanceof SMBMapJoinOperator) {
          // Bucketing and sorting keys should exactly match
          if (!(bucketPositions.equals(sortPositions))) {
            return null;
          }
          SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op;
          SMBJoinDesc smbJoinDesc = smbOp.getConf();
          int posBigTable = smbJoinDesc.getPosBigTable();

          // join keys dont match the bucketing keys
          List<ExprNodeDesc> keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable);
          if (keysBigTable.size() != bucketPositions.size()) {
            return null;
          }

          if (!validateSMBJoinKeys(
              smbJoinDesc, sourceTableBucketCols, sourceTableSortCols, sortOrder)) {
            return null;
          }

          sourceTableBucketCols.clear();
          sourceTableSortCols.clear();
          useBucketSortPositions = false;

          for (ExprNodeDesc keyBigTable : keysBigTable) {
            if (!(keyBigTable instanceof ExprNodeColumnDesc)) {
              return null;
            }
            sourceTableBucketCols.add((ExprNodeColumnDesc) keyBigTable);
            sourceTableSortCols.add((ExprNodeColumnDesc) keyBigTable);
          }

          // since it is a sort-merge join, only follow the big table
          op = op.getParentOperators().get(posBigTable);
        } else {
          // nothing to be done for filters - the output schema does not change.
          if (op instanceof TableScanOperator) {
            assert !useBucketSortPositions;
            TableScanOperator ts = (TableScanOperator) op;
            Table srcTable = ts.getConf().getTableMetadata();

            // Find the positions of the bucketed columns in the table corresponding
            // to the select list.
            // Consider the following scenario:
            // T1(key, value1, value2) bucketed/sorted by key into 2 buckets
            // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets
            // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1
            // should be optimized.

            // Start with the destination: T2, bucketed/sorted position is [1]
            // At the source T1, the column corresponding to that position is [key], which
            // maps to column [0] of T1, which is also bucketed/sorted into the same
            // number of buckets
            List<Integer> newBucketPositions = new ArrayList<Integer>();
            for (int pos = 0; pos < bucketPositions.size(); pos++) {
              ExprNodeColumnDesc col = sourceTableBucketCols.get(pos);
              String colName = col.getColumn();
              int bucketPos = findColumnPosition(srcTable.getCols(), colName);
              if (bucketPos < 0) {
                return null;
              }
              newBucketPositions.add(bucketPos);
            }

            // Find the positions/order of the sorted columns in the table corresponding
            // to the select list.
            List<Integer> newSortPositions = new ArrayList<Integer>();
            for (int pos = 0; pos < sortPositions.size(); pos++) {
              ExprNodeColumnDesc col = sourceTableSortCols.get(pos);
              String colName = col.getColumn();
              int sortPos = findColumnPosition(srcTable.getCols(), colName);
              if (sortPos < 0) {
                return null;
              }
              newSortPositions.add(sortPos);
            }

            if (srcTable.isPartitioned()) {
              PrunedPartitionList prunedParts =
                  pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts);
              List<Partition> partitions = prunedParts.getNotDeniedPartns();

              // Support for dynamic partitions can be added later
              // The following is not optimized:
              // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1';
              // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr
              if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) {
                return null;
              }
              for (Partition partition : partitions) {
                if (!checkPartition(
                    partition,
                    newBucketPositions,
                    newSortPositions,
                    sortOrder,
                    numBucketsDestination)) {
                  return null;
                }
              }

              removeReduceSink(
                  rsOp, (TableScanOperator) op, fsOp, partitions.get(0).getSortedPaths());
              return null;
            } else {
              if (!checkTable(
                  srcTable,
                  newBucketPositions,
                  newSortPositions,
                  sortOrder,
                  numBucketsDestination)) {
                return null;
              }

              removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths());
              return null;
            }
          }
          // None of the operators is changing the positions
          else if (op instanceof SelectOperator) {
            SelectOperator selectOp = (SelectOperator) op;
            SelectDesc selectDesc = selectOp.getConf();

            // Iterate backwards, from the destination table to the top of the tree
            // Based on the output column names, get the new columns.
            if (!useBucketSortPositions) {
              bucketPositions.clear();
              sortPositions.clear();
              List<String> outputColumnNames = selectDesc.getOutputColumnNames();

              for (ExprNodeColumnDesc col : sourceTableBucketCols) {
                String colName = col.getColumn();
                int colPos = outputColumnNames.indexOf(colName);
                if (colPos < 0) {
                  return null;
                }
                bucketPositions.add(colPos);
              }

              for (ExprNodeColumnDesc col : sourceTableSortCols) {
                String colName = col.getColumn();
                int colPos = outputColumnNames.indexOf(colName);
                if (colPos < 0) {
                  return null;
                }
                sortPositions.add(colPos);
              }
            }

            // There may be multiple selects - chose the one closest to the table
            sourceTableBucketCols.clear();
            sourceTableSortCols.clear();

            // Only columns can be selected for both sorted and bucketed positions
            for (int pos : bucketPositions) {
              ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
              if (!(selectColList instanceof ExprNodeColumnDesc)) {
                return null;
              }
              sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList);
            }

            for (int pos : sortPositions) {
              ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
              if (!(selectColList instanceof ExprNodeColumnDesc)) {
                return null;
              }
              sourceTableSortCols.add((ExprNodeColumnDesc) selectColList);
            }

            useBucketSortPositions = false;
          }
          op = op.getParentOperators().get(0);
        }
      }
    }
コード例 #3
0
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {

      // introduce RS and EX before FS. If the operator tree already contains
      // RS then ReduceSinkDeDuplication optimization should merge them
      FileSinkOperator fsOp = (FileSinkOperator) nd;

      LOG.info("Sorted dynamic partitioning optimization kicked in..");

      // if not dynamic partitioning then bail out
      if (fsOp.getConf().getDynPartCtx() == null) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as dynamic partitioning context is null");
        return null;
      }

      // if list bucketing then bail out
      ListBucketingCtx lbCtx = fsOp.getConf().getLbCtx();
      if (lbCtx != null
          && !lbCtx.getSkewedColNames().isEmpty()
          && !lbCtx.getSkewedColValues().isEmpty()) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as list bucketing is enabled");
        return null;
      }

      Table destTable = fsOp.getConf().getTable();
      if (destTable == null) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as destination table is null");
        return null;
      }

      // unlink connection between FS and its parent
      Operator<? extends OperatorDesc> fsParent = fsOp.getParentOperators().get(0);
      // if all dp columns got constant folded then disable this optimization
      if (allStaticPartitions(fsParent, fsOp.getConf().getDynPartCtx())) {
        LOG.debug(
            "Bailing out of sorted dynamic partition optimizer as all dynamic partition"
                + " columns got constant folded (static partitioning)");
        return null;
      }

      // if RS is inserted by enforce bucketing or sorting, we need to remove it
      // since ReduceSinkDeDuplication will not merge them to single RS.
      // RS inserted by enforce bucketing/sorting will have bucketing column in
      // reduce sink key whereas RS inserted by this optimization will have
      // partition columns followed by bucket number followed by sort columns in
      // the reduce sink key. Since both key columns are not prefix subset
      // ReduceSinkDeDuplication will not merge them together resulting in 2 MR jobs.
      // To avoid that we will remove the RS (and EX) inserted by enforce bucketing/sorting.
      if (!removeRSInsertedByEnforceBucketing(fsOp)) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as some partition columns "
                + "got constant folded.");
        return null;
      }

      // unlink connection between FS and its parent
      fsParent = fsOp.getParentOperators().get(0);
      fsParent.getChildOperators().clear();

      DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx();
      int numBuckets = destTable.getNumBuckets();

      // if enforce bucketing/sorting is disabled numBuckets will not be set.
      // set the number of buckets here to ensure creation of empty buckets
      dpCtx.setNumBuckets(numBuckets);

      // Get the positions for partition, bucket and sort columns
      List<Integer> bucketPositions =
          getBucketPositions(destTable.getBucketCols(), destTable.getCols());
      List<Integer> sortPositions = null;
      List<Integer> sortOrder = null;
      ArrayList<ExprNodeDesc> bucketColumns;
      if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE
          || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) {
        // When doing updates and deletes we always want to sort on the rowid because the ACID
        // reader will expect this sort order when doing reads.  So
        // ignore whatever comes from the table and enforce this sort order instead.
        sortPositions = Arrays.asList(0);
        sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if
        bucketColumns =
            new ArrayList<>(); // Bucketing column is already present in ROW__ID, which is specially
                               // handled in ReduceSink
      } else {
        if (!destTable.getSortCols().isEmpty()) {
          // Sort columns specified by table
          sortPositions = getSortPositions(destTable.getSortCols(), destTable.getCols());
          sortOrder = getSortOrders(destTable.getSortCols(), destTable.getCols());
        } else {
          // Infer sort columns from operator tree
          sortPositions = Lists.newArrayList();
          sortOrder = Lists.newArrayList();
          inferSortPositions(fsParent, sortPositions, sortOrder);
        }
        List<ColumnInfo> colInfos = fsParent.getSchema().getSignature();
        bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos);
      }
      List<Integer> sortNullOrder = new ArrayList<Integer>();
      for (int order : sortOrder) {
        sortNullOrder.add(order == 1 ? 0 : 1); // for asc, nulls first; for desc, nulls last
      }
      LOG.debug("Got sort order");
      for (int i : sortPositions) LOG.debug("sort position " + i);
      for (int i : sortOrder) LOG.debug("sort order " + i);
      for (int i : sortNullOrder) LOG.debug("sort null order " + i);
      List<Integer> partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema());

      // update file sink descriptor
      fsOp.getConf().setMultiFileSpray(false);
      fsOp.getConf().setNumFiles(1);
      fsOp.getConf().setTotalFiles(1);

      ArrayList<ColumnInfo> parentCols = Lists.newArrayList(fsParent.getSchema().getSignature());
      ArrayList<ExprNodeDesc> allRSCols = Lists.newArrayList();
      for (ColumnInfo ci : parentCols) {
        allRSCols.add(new ExprNodeColumnDesc(ci));
      }

      // Create ReduceSink operator
      ReduceSinkOperator rsOp =
          getReduceSinkOp(
              partitionPositions,
              sortPositions,
              sortOrder,
              sortNullOrder,
              allRSCols,
              bucketColumns,
              numBuckets,
              fsParent,
              fsOp.getConf().getWriteType());

      List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(allRSCols.size());
      List<String> colNames = new ArrayList<String>();
      String colName;
      for (int i = 0; i < allRSCols.size(); i++) {
        ExprNodeDesc col = allRSCols.get(i);
        colName = col.getExprString();
        colNames.add(colName);
        if (partitionPositions.contains(i) || sortPositions.contains(i)) {
          descs.add(
              new ExprNodeColumnDesc(
                  col.getTypeInfo(), ReduceField.KEY.toString() + "." + colName, null, false));
        } else {
          descs.add(
              new ExprNodeColumnDesc(
                  col.getTypeInfo(), ReduceField.VALUE.toString() + "." + colName, null, false));
        }
      }
      RowSchema selRS = new RowSchema(fsParent.getSchema());
      if (!bucketColumns.isEmpty()
          || fsOp.getConf().getWriteType() == Operation.DELETE
          || fsOp.getConf().getWriteType() == Operation.UPDATE) {
        descs.add(
            new ExprNodeColumnDesc(
                TypeInfoFactory.stringTypeInfo,
                ReduceField.KEY.toString() + ".'" + BUCKET_NUMBER_COL_NAME + "'",
                null,
                false));
        colNames.add("'" + BUCKET_NUMBER_COL_NAME + "'");
        ColumnInfo ci =
            new ColumnInfo(
                BUCKET_NUMBER_COL_NAME,
                TypeInfoFactory.stringTypeInfo,
                selRS.getSignature().get(0).getTabAlias(),
                true,
                true);
        selRS.getSignature().add(ci);
        fsParent.getSchema().getSignature().add(ci);
      }
      // Create SelectDesc
      SelectDesc selConf = new SelectDesc(descs, colNames);

      // Create Select Operator
      SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(selConf, selRS, rsOp);

      // link SEL to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(selOp);
      selOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0
          || fsOp.getConf().getWriteType() == Operation.DELETE
          || fsOp.getConf().getWriteType() == Operation.UPDATE) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      fsOp.getConf().setPartitionCols(rsOp.getConf().getPartitionCols());

      LOG.info(
          "Inserted "
              + rsOp.getOperatorId()
              + " and "
              + selOp.getOperatorId()
              + " as parent of "
              + fsOp.getOperatorId()
              + " and child of "
              + fsParent.getOperatorId());

      parseCtx.setReduceSinkAddedBySortedDynPartition(true);
      return null;
    }