/** * handle partial scan command. * * <p>It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand( TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task<StatsWork> statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf()); // stats work statsWork.setPartialScanAnalyzeCommand(true); // partial scan task DriverContext driverCxt = new DriverContext(); Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf()); partialScanTask.initialize(parseContext.getConf(), null, driverCxt); partialScanTask.setWork(scanWork); statsWork.setSourceTask(partialScanTask); // task dependency context.rootTasks.remove(context.currentTask); context.rootTasks.add(partialScanTask); partialScanTask.addDependentTask(statsTask); }
@SuppressWarnings("unchecked") @Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { GenTezProcContext context = (GenTezProcContext) procContext; TableScanOperator tableScan = (TableScanOperator) nd; ParseContext parseContext = context.parseContext; Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass(); if (parseContext.getQueryProperties().isAnalyzeCommand()) { assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0; String alias = null; for (String a : parseContext.getTopOps().keySet()) { if (tableScan == parseContext.getTopOps().get(a)) { alias = a; } } assert alias != null; TezWork tezWork = context.currentTask.getWork(); if (inputFormat.equals(OrcInputFormat.class)) { // For ORC, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable( parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); if (confirmedParts.size() > 0) { Table source = tableScan.getConf().getTableMetadata(); List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false); snjWork.setPrunedPartitionList(partList); } Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); context.rootTasks.add(snjTask); return true; } else { // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; // The plan consists of a simple TezTask followed by a StatsTask. // The Tez task is just a simple TableScanOperator StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); statsWork.setSourceTask(context.currentTask); statsWork.setStatsReliable( parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask); // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); context.rootTasks.add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned // list, // and pass it to setTaskPlan as the last parameter Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); PrunedPartitionList partitions = null; if (confirmedPartns.size() > 0) { Table source = tableScan.getConf().getTableMetadata(); List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan); partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); } MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); w.setGatheringStats(true); return true; } } return null; }
/** * Table Sink encountered. * * @param nd the table sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator op = (TableScanOperator) nd; GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); Class<? extends InputFormat> inputFormat = op.getConf().getTableMetadata().getInputFormatClass(); Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); // create a dummy MapReduce task MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx); MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf()); Operator<? extends OperatorDesc> currTopOp = op; ctx.setCurrTask(currTask); ctx.setCurrTopOp(currTopOp); for (String alias : parseCtx.getTopOps().keySet()) { Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias); if (currOp == op) { String currAliasId = alias; ctx.setCurrAliasId(currAliasId); mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId)); if (parseCtx.getQueryProperties().isAnalyzeCommand()) { boolean partialScan = parseCtx.getQueryProperties().isPartialScanAnalyzeCommand(); boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand(); if (inputFormat.equals(OrcInputFormat.class)) { // For ORC, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any MR or Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable( parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op); if (confirmedParts.size() > 0) { Table source = op.getConf().getTableMetadata(); List<String> partCols = GenMapRedUtils.getPartitionColumns(op); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false); snjWork.setPrunedPartitionList(partList); } Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); ctx.setCurrTask(snjTask); ctx.setCurrTopOp(null); ctx.getRootTasks().clear(); ctx.getRootTasks().add(snjTask); } else { // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; // The plan consists of a simple MapRedTask followed by a StatsTask. // The MR task is just a simple TableScanOperator StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(op.getConf().getStatsAggPrefix()); statsWork.setSourceTask(currTask); statsWork.setStatsReliable( parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); currTask.addDependentTask(statsTask); if (!ctx.getRootTasks().contains(currTask)) { ctx.getRootTasks().add(currTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (noScan) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); ctx.getRootTasks().remove(currTask); ctx.getRootTasks().add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (partialScan) { handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask); } currWork.getMapWork().setGatheringStats(true); if (currWork.getReduceWork() != null) { currWork.getReduceWork().setGatheringStats(true); } // NOTE: here we should use the new partition predicate pushdown API to get a list of // pruned list, // and pass it to setTaskPlan as the last parameter Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op); if (confirmedPartns.size() > 0) { Table source = op.getConf().getTableMetadata(); List<String> partCols = GenMapRedUtils.getPartitionColumns(op); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, partCols, false); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx, partList); } else { // non-partitioned table GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx); } } } return true; } } assert false; return null; }