/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
private Task<? extends Serializable> findMoveTask( List<Task<? extends Serializable>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<? extends Serializable> mvTsk : mvTasks) { MoveWork mvWork = (MoveWork) mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) { return mvTsk; } } return null; }
private Task<MoveWork> findMoveTask(List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } String fsOpDirName = fsOp.getConf().getFinalDirName(); if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOpDirName))) { return mvTsk; } } return null; }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = new StatsWork(mvWork.getLoadTableWork()); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }