private boolean allStaticPartitions(
     Operator<? extends OperatorDesc> op, final DynamicPartitionCtx dynPartCtx) {
   int numDpCols = dynPartCtx.getNumDPCols();
   int numCols = op.getSchema().getColumnNames().size();
   List<String> dpCols = op.getSchema().getColumnNames().subList(numCols - numDpCols, numCols);
   if (op.getColumnExprMap() == null) {
     // find first operator upstream with valid (non-null) column expression map
     for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
       if (parent.getColumnExprMap() != null) {
         op = parent;
         break;
       }
     }
   }
   if (op.getColumnExprMap() != null) {
     for (String dpCol : dpCols) {
       ExprNodeDesc end = ExprNodeDescUtils.findConstantExprOrigin(dpCol, op);
       if (!(end instanceof ExprNodeConstantDesc)) {
         return false;
       }
     }
   } else {
     return false;
   }
   return true;
 }
    private List<Integer> getPartitionPositions(DynamicPartitionCtx dpCtx, RowSchema schema) {
      int numPartCols = dpCtx.getNumDPCols();
      int numCols = schema.getSignature().size();
      List<Integer> partPos = Lists.newArrayList();

      // partition columns will always at the last
      for (int i = numCols - numPartCols; i < numCols; i++) {
        partPos.add(i);
      }
      return partPos;
    }
  /**
   * create a Map-only merge job with the following operators:
   *
   * @param fsInput
   * @param ctx
   * @param finalName MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v
   *     TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge)
   *     <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
   *     do not contain the dynamic partitions (their parent). So after the dynamic partitions are
   *     created (after the first job finished before the moveTask or ConditionalTask start), we
   *     need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition
   *     directories.
   */
  private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) {

    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =
        new FileSinkDesc(
            finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol : dpCtx.getDPColNames()) {
        ColumnInfo colInfo =
            new ColumnInfo(
                dpCol,
                TypeInfoFactory.stringTypeInfo, // all partition column type should be string
                tblAlias,
                true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length() - 1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .setProperty(
              org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
              partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    }

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null),
            false);
    MapredWork cplan = createMergeTask(ctx.getConf(), tsMerge, fsInputDesc);
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    ConditionalTask cndTsk =
        createCondTask(ctx.getConf(), ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getDirName());

    // keep the dynamic partition context in conditional task resolver context
    ConditionalResolverMergeFilesCtx mrCtx =
        (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
    mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());

    //
    // 3. add the moveTask as the children of the conditional task
    //
    LinkMoveTask(ctx, fsOutput, cndTsk);
  }
示例#4
0
  /**
   * @param fsInput The FileSink operator.
   * @param ctx The MR processing context.
   * @param finalName the final destination path the merge job should output.
   * @throws SemanticException
   *     <p>create a Map-only merge job using CombineHiveInputFormat for all partitions with
   *     following operators: MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v
   *     TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge)
   *     <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
   *     do not contain the dynamic partitions (their parent). So after the dynamic partitions are
   *     created (after the first job finished before the moveTask or ConditionalTask start), we
   *     need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition
   *     directories.
   */
  private void createMRWorkForMergingFiles(
      FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {

    //
    // 1. create the operator tree
    //
    HiveConf conf = ctx.getParseCtx().getConf();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends OperatorDesc> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =
        new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol : dpCtx.getDPColNames()) {
        ColumnInfo colInfo =
            new ColumnInfo(
                dpCol,
                TypeInfoFactory.stringTypeInfo, // all partition column type should be string
                tblAlias,
                true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length() - 1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .setProperty(
              org.apache
                  .hadoop
                  .hive
                  .metastore
                  .api
                  .hive_metastoreConstants
                  .META_TABLE_PARTITION_COLUMNS,
              partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .remove(
              org.apache
                  .hadoop
                  .hive
                  .metastore
                  .api
                  .hive_metastoreConstants
                  .META_TABLE_PARTITION_COLUMNS);
    }

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null),
            false);
    MapredWork cplan;

    if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL)
        && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGERCFILEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

        LOG.info("RCFile format- Using block level merge");
        cplan =
            createBlockMergeTask(
                fsInputDesc,
                finalName,
                dpCtx != null && dpCtx.getNumDPCols() > 0,
                RCFileMergeMapper.class,
                RCFileInputFormat.class,
                RCFileBlockMergeInputFormat.class);
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }

    } else if (conf.getBoolVar(ConfVars.HIVEMERGEORCBLOCKLEVEL)
        && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEORCINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

        LOG.info("ORCFile format- Using block level merge");
        cplan =
            createBlockMergeTask(
                fsInputDesc,
                finalName,
                dpCtx != null && dpCtx.getNumDPCols() > 0,
                OrcMergeMapper.class,
                OrcInputFormat.class,
                OrcBlockMergeInputFormat.class);
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }

    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      // use CombineHiveInputFormat for map-only merging
    }
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    ConditionalTask cndTsk =
        createCondTask(conf, ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getFinalDirName());

    // keep the dynamic partition context in conditional task resolver context
    ConditionalResolverMergeFilesCtx mrCtx =
        (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
    mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
    mrCtx.setLbCtx(fsInputDesc.getLbCtx());

    //
    // 3. add the moveTask as the children of the conditional task
    //
    linkMoveTask(ctx, fsOutput, cndTsk);
  }
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {

      // introduce RS and EX before FS. If the operator tree already contains
      // RS then ReduceSinkDeDuplication optimization should merge them
      FileSinkOperator fsOp = (FileSinkOperator) nd;

      LOG.info("Sorted dynamic partitioning optimization kicked in..");

      // if not dynamic partitioning then bail out
      if (fsOp.getConf().getDynPartCtx() == null) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as dynamic partitioning context is null");
        return null;
      }

      // if list bucketing then bail out
      ListBucketingCtx lbCtx = fsOp.getConf().getLbCtx();
      if (lbCtx != null
          && !lbCtx.getSkewedColNames().isEmpty()
          && !lbCtx.getSkewedColValues().isEmpty()) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as list bucketing is enabled");
        return null;
      }

      Table destTable = fsOp.getConf().getTable();
      if (destTable == null) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as destination table is null");
        return null;
      }

      // unlink connection between FS and its parent
      Operator<? extends OperatorDesc> fsParent = fsOp.getParentOperators().get(0);
      // if all dp columns got constant folded then disable this optimization
      if (allStaticPartitions(fsParent, fsOp.getConf().getDynPartCtx())) {
        LOG.debug(
            "Bailing out of sorted dynamic partition optimizer as all dynamic partition"
                + " columns got constant folded (static partitioning)");
        return null;
      }

      // if RS is inserted by enforce bucketing or sorting, we need to remove it
      // since ReduceSinkDeDuplication will not merge them to single RS.
      // RS inserted by enforce bucketing/sorting will have bucketing column in
      // reduce sink key whereas RS inserted by this optimization will have
      // partition columns followed by bucket number followed by sort columns in
      // the reduce sink key. Since both key columns are not prefix subset
      // ReduceSinkDeDuplication will not merge them together resulting in 2 MR jobs.
      // To avoid that we will remove the RS (and EX) inserted by enforce bucketing/sorting.
      if (!removeRSInsertedByEnforceBucketing(fsOp)) {
        LOG.debug(
            "Bailing out of sort dynamic partition optimization as some partition columns "
                + "got constant folded.");
        return null;
      }

      // unlink connection between FS and its parent
      fsParent = fsOp.getParentOperators().get(0);
      fsParent.getChildOperators().clear();

      DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx();
      int numBuckets = destTable.getNumBuckets();

      // if enforce bucketing/sorting is disabled numBuckets will not be set.
      // set the number of buckets here to ensure creation of empty buckets
      dpCtx.setNumBuckets(numBuckets);

      // Get the positions for partition, bucket and sort columns
      List<Integer> bucketPositions =
          getBucketPositions(destTable.getBucketCols(), destTable.getCols());
      List<Integer> sortPositions = null;
      List<Integer> sortOrder = null;
      ArrayList<ExprNodeDesc> bucketColumns;
      if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE
          || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) {
        // When doing updates and deletes we always want to sort on the rowid because the ACID
        // reader will expect this sort order when doing reads.  So
        // ignore whatever comes from the table and enforce this sort order instead.
        sortPositions = Arrays.asList(0);
        sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if
        bucketColumns =
            new ArrayList<>(); // Bucketing column is already present in ROW__ID, which is specially
                               // handled in ReduceSink
      } else {
        if (!destTable.getSortCols().isEmpty()) {
          // Sort columns specified by table
          sortPositions = getSortPositions(destTable.getSortCols(), destTable.getCols());
          sortOrder = getSortOrders(destTable.getSortCols(), destTable.getCols());
        } else {
          // Infer sort columns from operator tree
          sortPositions = Lists.newArrayList();
          sortOrder = Lists.newArrayList();
          inferSortPositions(fsParent, sortPositions, sortOrder);
        }
        List<ColumnInfo> colInfos = fsParent.getSchema().getSignature();
        bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos);
      }
      List<Integer> sortNullOrder = new ArrayList<Integer>();
      for (int order : sortOrder) {
        sortNullOrder.add(order == 1 ? 0 : 1); // for asc, nulls first; for desc, nulls last
      }
      LOG.debug("Got sort order");
      for (int i : sortPositions) LOG.debug("sort position " + i);
      for (int i : sortOrder) LOG.debug("sort order " + i);
      for (int i : sortNullOrder) LOG.debug("sort null order " + i);
      List<Integer> partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema());

      // update file sink descriptor
      fsOp.getConf().setMultiFileSpray(false);
      fsOp.getConf().setNumFiles(1);
      fsOp.getConf().setTotalFiles(1);

      ArrayList<ColumnInfo> parentCols = Lists.newArrayList(fsParent.getSchema().getSignature());
      ArrayList<ExprNodeDesc> allRSCols = Lists.newArrayList();
      for (ColumnInfo ci : parentCols) {
        allRSCols.add(new ExprNodeColumnDesc(ci));
      }

      // Create ReduceSink operator
      ReduceSinkOperator rsOp =
          getReduceSinkOp(
              partitionPositions,
              sortPositions,
              sortOrder,
              sortNullOrder,
              allRSCols,
              bucketColumns,
              numBuckets,
              fsParent,
              fsOp.getConf().getWriteType());

      List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(allRSCols.size());
      List<String> colNames = new ArrayList<String>();
      String colName;
      for (int i = 0; i < allRSCols.size(); i++) {
        ExprNodeDesc col = allRSCols.get(i);
        colName = col.getExprString();
        colNames.add(colName);
        if (partitionPositions.contains(i) || sortPositions.contains(i)) {
          descs.add(
              new ExprNodeColumnDesc(
                  col.getTypeInfo(), ReduceField.KEY.toString() + "." + colName, null, false));
        } else {
          descs.add(
              new ExprNodeColumnDesc(
                  col.getTypeInfo(), ReduceField.VALUE.toString() + "." + colName, null, false));
        }
      }
      RowSchema selRS = new RowSchema(fsParent.getSchema());
      if (!bucketColumns.isEmpty()
          || fsOp.getConf().getWriteType() == Operation.DELETE
          || fsOp.getConf().getWriteType() == Operation.UPDATE) {
        descs.add(
            new ExprNodeColumnDesc(
                TypeInfoFactory.stringTypeInfo,
                ReduceField.KEY.toString() + ".'" + BUCKET_NUMBER_COL_NAME + "'",
                null,
                false));
        colNames.add("'" + BUCKET_NUMBER_COL_NAME + "'");
        ColumnInfo ci =
            new ColumnInfo(
                BUCKET_NUMBER_COL_NAME,
                TypeInfoFactory.stringTypeInfo,
                selRS.getSignature().get(0).getTabAlias(),
                true,
                true);
        selRS.getSignature().add(ci);
        fsParent.getSchema().getSignature().add(ci);
      }
      // Create SelectDesc
      SelectDesc selConf = new SelectDesc(descs, colNames);

      // Create Select Operator
      SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(selConf, selRS, rsOp);

      // link SEL to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(selOp);
      selOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0
          || fsOp.getConf().getWriteType() == Operation.DELETE
          || fsOp.getConf().getWriteType() == Operation.UPDATE) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      fsOp.getConf().setPartitionCols(rsOp.getConf().getPartitionCols());

      LOG.info(
          "Inserted "
              + rsOp.getOperatorId()
              + " and "
              + selOp.getOperatorId()
              + " as parent of "
              + fsOp.getOperatorId()
              + " and child of "
              + fsParent.getOperatorId());

      parseCtx.setReduceSinkAddedBySortedDynPartition(true);
      return null;
    }