/**
   * handle partial scan command. It is composed of PartialScanTask followed by StatsTask .
   *
   * @param op
   * @param ctx
   * @param parseCtx
   * @param currTask
   * @param parseInfo
   * @param statsWork
   * @param statsTask
   * @throws SemanticException
   */
  private void handlePartialScanCommand(
      TableScanOperator op,
      GenMRProcContext ctx,
      ParseContext parseCtx,
      Task<? extends Serializable> currTask,
      StatsWork statsWork,
      Task<StatsWork> statsTask)
      throws SemanticException {
    String aggregationKey = op.getConf().getStatsAggPrefix();
    StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();

    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);

    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);

    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf());
    psTask.initialize(parseCtx.getConf(), null, driverCxt);
    psTask.setWork(scanWork);

    // task dependency
    ctx.getRootTasks().remove(currTask);
    ctx.getRootTasks().add(psTask);
    psTask.addDependentTask(statsTask);
    List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>();
    parentTasks.add(psTask);
    statsTask.setParentTasks(parentTasks);
  }
Esempio n. 2
0
 /**
  * Add a dependent task on the current task. Return if the dependency already existed or is this a
  * new one
  *
  * @return true if the task got added false if it already existed
  */
 public boolean addDependentTask(Task<? extends Serializable> dependent) {
   boolean ret = false;
   if (getChildTasks() == null) {
     setChildTasks(new ArrayList<Task<? extends Serializable>>());
   }
   if (!getChildTasks().contains(dependent)) {
     ret = true;
     getChildTasks().add(dependent);
     if (dependent.getParentTasks() == null) {
       dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>());
     }
     if (!dependent.getParentTasks().contains(this)) {
       dependent.getParentTasks().add(this);
     }
   }
   return ret;
 }
Esempio n. 3
0
  @SuppressWarnings("unchecked")
  @Override
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs)
      throws SemanticException {

    GenTezProcContext context = (GenTezProcContext) procContext;

    TableScanOperator tableScan = (TableScanOperator) nd;

    ParseContext parseContext = context.parseContext;
    Class<? extends InputFormat> inputFormat =
        tableScan.getConf().getTableMetadata().getInputFormatClass();

    if (parseContext.getQueryProperties().isAnalyzeCommand()) {

      assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0;

      String alias = null;
      for (String a : parseContext.getTopOps().keySet()) {
        if (tableScan == parseContext.getTopOps().get(a)) {
          alias = a;
        }
      }

      assert alias != null;

      TezWork tezWork = context.currentTask.getWork();
      if (inputFormat.equals(OrcInputFormat.class)) {
        // For ORC, all the following statements are the same
        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;

        // There will not be any Tez job above this task
        StatsNoJobWork snjWork =
            new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
        snjWork.setStatsReliable(
            parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        // If partition is specified, get pruned partition list
        Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
        if (confirmedParts.size() > 0) {
          Table source = tableScan.getConf().getTableMetadata();
          List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
          PrunedPartitionList partList =
              new PrunedPartitionList(source, confirmedParts, partCols, false);
          snjWork.setPrunedPartitionList(partList);
        }
        Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
        snjTask.setParentTasks(null);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(snjTask);
        return true;
      } else {

        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
        // The plan consists of a simple TezTask followed by a StatsTask.
        // The Tez task is just a simple TableScanOperator

        StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
        statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
        statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir());
        statsWork.setSourceTask(context.currentTask);
        statsWork.setStatsReliable(
            parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
        context.currentTask.addDependentTask(statsTask);

        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
        // The plan consists of a StatsTask only.
        if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) {
          statsTask.setParentTasks(null);
          statsWork.setNoScanAnalyzeCommand(true);
          context.rootTasks.remove(context.currentTask);
          context.rootTasks.add(statsTask);
        }

        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
        if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) {
          handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask);
        }

        // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned
        // list,
        // and pass it to setTaskPlan as the last parameter
        Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
        PrunedPartitionList partitions = null;
        if (confirmedPartns.size() > 0) {
          Table source = tableScan.getConf().getTableMetadata();
          List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
          partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false);
        }

        MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
        w.setGatheringStats(true);

        return true;
      }
    }

    return null;
  }
  /**
   * Table Sink encountered.
   *
   * @param nd the table sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Class<? extends InputFormat> inputFormat =
        op.getConf().getTableMetadata().getInputFormatClass();
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();

    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf());
    Operator<? extends OperatorDesc> currTopOp = op;
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(currTopOp);

    for (String alias : parseCtx.getTopOps().keySet()) {
      Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));

        if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
          boolean partialScan = parseCtx.getQueryProperties().isPartialScanAnalyzeCommand();
          boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
          if (inputFormat.equals(OrcInputFormat.class)) {
            // For ORC, all the following statements are the same
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;

            // There will not be any MR or Tez job above this task
            StatsNoJobWork snjWork =
                new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec());
            snjWork.setStatsReliable(
                parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            // If partition is specified, get pruned partition list
            Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
            if (confirmedParts.size() > 0) {
              Table source = op.getConf().getTableMetadata();
              List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
              PrunedPartitionList partList =
                  new PrunedPartitionList(source, confirmedParts, partCols, false);
              snjWork.setPrunedPartitionList(partList);
            }
            Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
            ctx.setCurrTask(snjTask);
            ctx.setCurrTopOp(null);
            ctx.getRootTasks().clear();
            ctx.getRootTasks().add(snjTask);
          } else {
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
            // The plan consists of a simple MapRedTask followed by a StatsTask.
            // The MR task is just a simple TableScanOperator

            StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec());
            statsWork.setAggKey(op.getConf().getStatsAggPrefix());
            statsWork.setSourceTask(currTask);
            statsWork.setStatsReliable(
                parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
            currTask.addDependentTask(statsTask);
            if (!ctx.getRootTasks().contains(currTask)) {
              ctx.getRootTasks().add(currTask);
            }

            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
            // The plan consists of a StatsTask only.
            if (noScan) {
              statsTask.setParentTasks(null);
              statsWork.setNoScanAnalyzeCommand(true);
              ctx.getRootTasks().remove(currTask);
              ctx.getRootTasks().add(statsTask);
            }

            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
            if (partialScan) {
              handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask);
            }

            currWork.getMapWork().setGatheringStats(true);
            if (currWork.getReduceWork() != null) {
              currWork.getReduceWork().setGatheringStats(true);
            }

            // NOTE: here we should use the new partition predicate pushdown API to get a list of
            // pruned list,
            // and pass it to setTaskPlan as the last parameter
            Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
            if (confirmedPartns.size() > 0) {
              Table source = op.getConf().getTableMetadata();
              List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
              PrunedPartitionList partList =
                  new PrunedPartitionList(source, confirmedPartns, partCols, false);
              GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx, partList);
            } else { // non-partitioned table
              GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx);
            }
          }
        }

        return true;
      }
    }
    assert false;
    return null;
  }