// push the feed to its subscribers protected void pushFeed(FeedType feedType, Object feedValue) { if (feedSubscribers != null) { for (Task<? extends Serializable> s : feedSubscribers) { s.receiveFeed(feedType, feedValue); } } }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> task = (Task<? extends Serializable>) nd; if (!task.isMapRedTask() || task instanceof ConditionalTask || ((MapredWork) task.getWork()).getReducer() == null) { return null; } SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, physicalContext.getParseContext()); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", CommonJoinOperator.getOperatorName() + "%"), SkewJoinProcFactory.getJoinProc()); // The dispatcher fires the processor corresponding to the closest // matching rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher( SkewJoinProcFactory.getDefaultProc(), opRules, skewJoinProcContext); GraphWalker ogw = new DefaultGraphWalker(disp); // iterator the reducer operator tree ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.add(((MapredWork) task.getWork()).getReducer()); ogw.startWalking(topNodes, null); return null; }
/** * handle partial scan command. * * <p>It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand( TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task<StatsWork> statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf()); // stats work statsWork.setPartialScanAnalyzeCommand(true); // partial scan task DriverContext driverCxt = new DriverContext(); Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf()); partialScanTask.initialize(parseContext.getConf(), null, driverCxt); partialScanTask.setWork(scanWork); statsWork.setSourceTask(partialScanTask); // task dependency context.rootTasks.remove(context.currentTask); context.rootTasks.add(partialScanTask); partialScanTask.addDependentTask(statsTask); }
/** * handle partial scan command. It is composed of PartialScanTask followed by StatsTask . * * @param op * @param ctx * @param parseCtx * @param currTask * @param parseInfo * @param statsWork * @param statsTask * @throws SemanticException */ private void handlePartialScanCommand( TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task<? extends Serializable> currTask, StatsWork statsWork, Task<StatsWork> statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey); List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); // stats work statsWork.setPartialScanAnalyzeCommand(true); // partial scan task DriverContext driverCxt = new DriverContext(); Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf()); psTask.initialize(parseCtx.getConf(), null, driverCxt); psTask.setWork(scanWork); // task dependency ctx.getRootTasks().remove(currTask); ctx.getRootTasks().add(psTask); psTask.addDependentTask(statsTask); List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>(); parentTasks.add(psTask); statsTask.setParentTasks(parentTasks); }
/* * Multiple file sink descriptors are linked. * Use the task created by the first linked file descriptor */ private void processLinkedFileDesc(GenMRProcContext ctx, Task<? extends Serializable> childTask) throws SemanticException { Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); Task<? extends Serializable> currTask = ctx.getCurrTask(); if (currTopOp != null) { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); } if (!rootTasks.contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) { rootTasks.add(currTask); } } if (childTask != null) { currTask.addDependentTask(childTask); } }
/** * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor. * Feed is a generic form of execution-time feedback (type, value) pair from one task to another * task. Examples include dynamic partitions (which are only available at execution time). The * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory * which is mixed with old partitions). * * @param publisher this feed provider. */ public void subscribeFeed(Task<? extends Serializable> publisher) { if (publisher != this && publisher.ancestorOrSelf(this)) { if (publisher.getFeedSubscribers() == null) { publisher.setFeedSubscribers(new LinkedList<Task<? extends Serializable>>()); } publisher.getFeedSubscribers().add(this); } }
public boolean addRootIfPossible(Task<? extends Serializable> task) { if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) { if (!rootTasks.contains(task)) { return rootTasks.add(task); } } return false; }
/** * Remove the dependent task. * * @param dependent the task to remove */ public void removeDependentTask(Task<? extends Serializable> dependent) { if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) { getChildTasks().remove(dependent); if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) { dependent.getParentTasks().remove(this); } } }
/** * * Compile the query and extract metadata * * @param sqlOperationConf * @throws HiveSQLException */ public void prepare(HiveConf sqlOperationConf) throws HiveSQLException { setState(OperationState.RUNNING); try { driver = new Driver(sqlOperationConf, getParentSession().getUserName()); // set the operation handle information in Driver, so that thrift API users // can use the operation handle they receive, to lookup query information in // Yarn ATS String guid64 = Base64.encodeBase64URLSafeString( getHandle().getHandleIdentifier().toTHandleIdentifier().getGuid()) .trim(); driver.setOperationId(guid64); // In Hive server mode, we are not able to retry in the FetchTask // case, when calling fetch queries since execute() has returned. // For now, we disable the test attempts. driver.setTryCount(Integer.MAX_VALUE); response = driver.compileAndRespond(statement); if (0 != response.getResponseCode()) { throw toSQLException("Error while compiling statement", response); } mResultSchema = driver.getSchema(); // hasResultSet should be true only if the query has a FetchTask // "explain" is an exception for now if (driver.getPlan().getFetchTask() != null) { // Schema has to be set if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) { throw new HiveSQLException( "Error compiling query: Schema and FieldSchema " + "should be set when query plan has a FetchTask"); } resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); } else { setHasResultSet(false); } // Set hasResultSet true if the plan has ExplainTask // TODO explain should use a FetchTask for reading for (Task<? extends Serializable> task : driver.getPlan().getRootTasks()) { if (task.getClass() == ExplainTask.class) { resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); break; } } } catch (HiveSQLException e) { setState(OperationState.ERROR); throw e; } catch (Throwable e) { setState(OperationState.ERROR); throw new HiveSQLException("Error running query: " + e.toString(), e); } }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<? extends Serializable>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal()) { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } String finalName = processFS(nd, stack, opProcCtx, chDir); // need to merge the files in the destination table/partitions if (chDir && (finalName != null)) { createMergeJob((FileSinkOperator) nd, ctx, finalName); } return null; }
private void LinkMoveTask( GenMRProcContext ctx, FileSinkOperator newOutput, ConditionalTask cndTsk) { List<Task<? extends Serializable>> mvTasks = ctx.getMvTask(); Task<? extends Serializable> mvTask = findMoveTask(mvTasks, newOutput); if (mvTask != null) { for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) { tsk.addDependentTask(mvTask); } } }
public boolean isRunnable() { boolean isrunnable = true; if (parentTasks != null) { for (Task<? extends Serializable> parent : parentTasks) { if (!parent.done()) { isrunnable = false; break; } } } return isrunnable; }
/** * Follows the task tree down from task and makes all leaves parents of mvTask * * @param ctx * @param mvTask * @param task */ private void linkMoveTask( GenMRProcContext ctx, Task<MoveWork> mvTask, Task<? extends Serializable> task) { if (task.getDependentTasks() == null || task.getDependentTasks().isEmpty()) { // If it's a leaf, add the move task as a child addDependentMoveTasks(ctx, mvTask, task); } else { // Otherwise, for each child run this method recursively for (Task<? extends Serializable> childTask : task.getDependentTasks()) { linkMoveTask(ctx, mvTask, childTask); } } }
public Task<? extends Serializable> getAndInitBackupTask() { if (backupTask != null) { // first set back the backup task with its children task. if (backupChildrenTasks != null) { for (Task<? extends Serializable> backupChild : backupChildrenTasks) { backupChild.getParentTasks().add(backupTask); } } // recursively remove task from its children tasks if this task doesn't have any parent task this.removeFromChildrenTasks(); } return backupTask; }
// return true if this task is an ancestor of itself of parameter desc private boolean ancestorOrSelf(Task<? extends Serializable> desc) { if (this == desc) { return true; } List<Task<? extends Serializable>> deps = getDependentTasks(); if (deps != null) { for (Task<? extends Serializable> d : deps) { if (d.ancestorOrSelf(desc)) { return true; } } } return false; }
/** * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork. * * @param conf HiveConf * @param currTask current leaf task * @param mvWork MoveWork for the move task * @param mergeWork MapredWork for the merge task. * @param inputPath the input directory of the merge/move task * @return The conditional task */ private ConditionalTask createCondTask( HiveConf conf, Task<? extends Serializable> currTask, MoveWork mvWork, MapredWork mergeWork, String inputPath) { // There are 3 options for this ConditionalTask: // 1) Merge the partitions // 2) Move the partitions (i.e. don't merge the partitions) // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't // merge others) in this case the merge is done first followed by the move to prevent // conflicts. Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf); Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(mvWork, conf); Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf); Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(mvWork, conf); // NOTE! It is necessary merge task is the parent of the move task, and not // the other way around, for the proper execution of the execute method of // ConditionalTask mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask); List<Serializable> listWorks = new ArrayList<Serializable>(); listWorks.add(mvWork); listWorks.add(mergeWork); ConditionalWork cndWork = new ConditionalWork(listWorks); List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>(); listTasks.add(moveOnlyMoveTask); listTasks.add(mergeOnlyMergeTask); listTasks.add(mergeAndMoveMergeTask); ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf); cndTsk.setListTasks(listTasks); // create resolver cndTsk.setResolver(new ConditionalResolverMergeFiles()); ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, inputPath); cndTsk.setResolverCtx(mrCtx); // make the conditional task as the child of the current leaf task currTask.addDependentTask(cndTsk); return cndTsk; }
/** Initialization when invoked from QL. */ @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. // In case it's in non-local mode, we need to move the SessionState files // and jars to jobConf. // In case it's in local mode, MapRedTask will set the jobConf. // // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(), // which will be called by both local and NON-local mode. String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } conf.stripHiddenConfigurations(job); this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this); }
@Override public void initialize( HiveConf conf, QueryPlan queryPlan, DriverContext ctx, CompilationOpContext opContext) { super.initialize(conf, queryPlan, ctx, opContext); work.initializeForFetch(opContext); try { // Create a file system handle JobConf job = new JobConf(conf); Operator<?> source = work.getSource(); if (source instanceof TableScanOperator) { TableScanOperator ts = (TableScanOperator) source; // push down projections ColumnProjectionUtils.appendReadColumns( job, ts.getNeededColumnIDs(), ts.getNeededColumns()); // push down filters HiveInputFormat.pushFilters(job, ts); } sink = work.getSink(); fetch = new FetchOperator(work, job, source, getVirtualColumns(source)); source.initialize(conf, new ObjectInspector[] {fetch.getOutputObjectInspector()}); totalRows = 0; ExecMapper.setDone(false); } catch (Exception e) { // Bail out ungracefully - we should never hit // this here - but would have hit it in SemanticAnalyzer LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); } }
/** * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork. * * @param conf HiveConf * @param currTask current leaf task * @param mvWork MoveWork for the move task * @param mergeWork MapredWork for the merge task. * @param inputPath the input directory of the merge/move task * @return The conditional task */ private ConditionalTask createCondTask( HiveConf conf, Task<? extends Serializable> currTask, MoveWork mvWork, MapredWork mergeWork, String inputPath) { Task<? extends Serializable> mergeTask = TaskFactory.get(mergeWork, conf); Task<? extends Serializable> moveTask = TaskFactory.get(mvWork, conf); List<Serializable> listWorks = new ArrayList<Serializable>(); listWorks.add(mvWork); listWorks.add(mergeWork); ConditionalWork cndWork = new ConditionalWork(listWorks); List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>(); listTasks.add(moveTask); listTasks.add(mergeTask); ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf); cndTsk.setListTasks(listTasks); // create resolver cndTsk.setResolver(new ConditionalResolverMergeFiles()); ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, inputPath); cndTsk.setResolverCtx(mrCtx); // make the conditional task as the child of the current leaf task currTask.addDependentTask(cndTsk); return cndTsk; }
/* * It is a idempotent function to add various intermediate files as the source * for the union. The plan has already been created. */ public static void initUnionPlan( GenMRProcContext opProcCtx, Task<? extends Serializable> currTask, boolean local) { MapredWork plan = (MapredWork) currTask.getWork(); UnionOperator currUnionOp = opProcCtx.getCurrUnionOp(); assert currUnionOp != null; GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp); assert uCtx != null; List<String> taskTmpDirLst = uCtx.getTaskTmpDir(); List<TableDesc> tt_descLst = uCtx.getTTDesc(); assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty(); assert taskTmpDirLst.size() == tt_descLst.size(); int size = taskTmpDirLst.size(); assert local == false; for (int pos = 0; pos < size; pos++) { String taskTmpDir = taskTmpDirLst.get(pos); TableDesc tt_desc = tt_descLst.get(pos); if (plan.getPathToAliases().get(taskTmpDir) == null) { plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); plan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, currUnionOp); } } }
@Override public void shutdown() { super.shutdown(); if (executor != null) { executor.destroy(); executor = null; } }
/** * Add a dependent task on the current task. Return if the dependency already existed or is this a * new one * * @return true if the task got added false if it already existed */ public boolean addDependentTask(Task<? extends Serializable> dependent) { boolean ret = false; if (getChildTasks() == null) { setChildTasks(new ArrayList<Task<? extends Serializable>>()); } if (!getChildTasks().contains(dependent)) { ret = true; getChildTasks().add(dependent); if (dependent.getParentTasks() == null) { dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>()); } if (!dependent.getParentTasks().contains(this)) { dependent.getParentTasks().add(this); } } return ret; }
private Task<? extends Serializable> findMoveTask( List<Task<? extends Serializable>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<? extends Serializable> mvTsk : mvTasks) { MoveWork mvWork = (MoveWork) mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) { return mvTsk; } } return null; }
public void removeFromChildrenTasks() { List<Task<? extends Serializable>> childrenTasks = this.getChildTasks(); if (childrenTasks == null) { return; } for (Task<? extends Serializable> childTsk : childrenTasks) { // remove this task from its children tasks childTsk.getParentTasks().remove(this); // recursively remove non-parent task from its children List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks(); if (siblingTasks == null || siblingTasks.size() == 0) { childTsk.removeFromChildrenTasks(); } } }
private Task<MoveWork> findMoveTask(List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); String srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); } String fsOpDirName = fsOp.getConf().getFinalDirName(); if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOpDirName))) { return mvTsk; } } return null; }
public synchronized <T extends Serializable> void updateStatus(Task<T> tTask) { this.taskState = tTask.getTaskState(); if (externalHandle == null && tTask.getExternalHandle() != null) { this.externalHandle = tTask.getExternalHandle(); } setStatusMessage(tTask.getStatusMessage()); switch (taskState) { case RUNNING: if (beginTime == null) { beginTime = System.currentTimeMillis(); } break; case FINISHED: if (endTime == null) { endTime = System.currentTimeMillis(); } break; } }
/** * Initialize the current plan by adding it to root tasks. * * @param op the reduce sink operator encountered * @param opProcCtx processing context */ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } assert currTopOp != null; List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx); } currTopOp = null; currAliasId = null; opProcCtx.setCurrTask(currTask); opProcCtx.setCurrTopOp(currTopOp); opProcCtx.setCurrAliasId(currAliasId); }
/** * Adds the dependencyTaskForMultiInsert in ctx as a dependent of parentTask. If mvTask is a load * table, and HIVE_MULTI_INSERT_ATOMIC_OUTPUTS is set, adds mvTask as a dependent of * dependencyTaskForMultiInsert in ctx, otherwise adds mvTask as a dependent of parentTask as * well. * * @param ctx * @param mvTask * @param parentTask */ private void addDependentMoveTasks( GenMRProcContext ctx, Task<MoveWork> mvTask, Task<? extends Serializable> parentTask) { if (mvTask != null) { if (ctx.getConf().getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) { DependencyCollectionTask dependencyTask = ctx.getDependencyTaskForMultiInsert(); parentTask.addDependentTask(dependencyTask); if (mvTask.getWork().getLoadTableWork() != null) { // Moving tables/partitions depend on the dependencyTask dependencyTask.addDependentTask(mvTask); } else { // Moving files depends on the parentTask (we still want the dependencyTask to depend // on the parentTask) parentTask.addDependentTask(mvTask); } } else { parentTask.addDependentTask(mvTask); } } }
@Override public void initialize( HiveConf conf, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) { super.initialize(conf, queryPlan, driverContext, opContext); job = new JobConf(conf, ExecDriver.class); execContext = new ExecMapperContext(job); // we don't use the HadoopJobExecHooks for local tasks this.jobExecHelper = new HadoopJobExecHelper(job, console, this, null); }