@Override public void onSuccess(TaskRunner2Result result) { knownTasks.remove(taskWrapper.getRequestId()); taskWrapper.setIsInPreemptableQueue(false); taskWrapper.maybeUnregisterForFinishedStateNotifications(); taskWrapper.getTaskRunnerCallable().getCallback().onSuccess(result); updatePreemptionListAndNotify(result.getEndReason()); }
// Errors are handled on the way over. FAIL/SUCCESS is informed via regular heartbeats. Killed // via a kill message when a task kill is requested by the daemon. @Override public void onSuccess(TaskRunner2Result result) { isCompleted.set(true); switch (result.getEndReason()) { // Only the KILLED case requires a message to be sent out to the AM. case SUCCESS: LOG.debug("Successfully finished {}", requestId); metrics.incrExecutorTotalSuccess(); break; case CONTAINER_STOP_REQUESTED: LOG.info("Received container stop request (AM preemption) for {}", requestId); break; case KILL_REQUESTED: LOG.info("Killed task {}", requestId); if (killtimerWatch.isRunning()) { killtimerWatch.stop(); long elapsed = killtimerWatch.elapsedMillis(); LOG.info("Time to die for task {}", elapsed); } metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis()); metrics.incrExecutorTotalKilled(); break; case COMMUNICATION_FAILURE: LOG.info("Failed to run {} due to communication failure", requestId); metrics.incrExecutorTotalExecutionFailed(); break; case TASK_ERROR: LOG.info("Failed to run {} due to task error", requestId); metrics.incrExecutorTotalExecutionFailed(); break; } fragmentCompletionHanler.fragmentComplete(fragmentInfo); taskRunnerCallable.shutdown(); HistoryLogger.logFragmentEnd( request.getApplicationIdString(), request.getContainerIdString(), executionContext.getHostName(), request.getFragmentSpec().getDagName(), request.getFragmentSpec().getVertexName(), request.getFragmentSpec().getFragmentNumber(), request.getFragmentSpec().getAttemptNumber(), taskRunnerCallable.threadName, taskRunnerCallable.startTime, true); metrics.decrExecutorNumQueuedRequests(); }
@Override protected TaskRunner2Result callInternal() throws Exception { isStarted.set(true); this.startTime = System.currentTimeMillis(); this.threadName = Thread.currentThread().getName(); if (LOG.isDebugEnabled()) { LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } // Unregister from the AMReporter, since the task is now running. this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort()); synchronized (this) { if (!shouldRunTask) { LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID()); return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false); } } // TODO This executor seems unnecessary. Here and TezChild ExecutorService executorReal = Executors.newFixedThreadPool( 1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTaskRunner").build()); executor = MoreExecutors.listeningDecorator(executorReal); // TODO Consolidate this code with TezChild. runtimeWatch.start(); UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser()); taskUgi.addCredentials(credentials); Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>(); serviceConsumerMetadata.put( TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID, TezCommonUtils.convertJobTokenToBytes(jobToken)); Multimap<String, String> startedInputsMap = createStartedInputMap(request.getFragmentSpec()); UserGroupInformation taskOwner = UserGroupInformation.createRemoteUser(request.getTokenIdentifier()); final InetSocketAddress address = NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort()); SecurityUtil.setTokenService(jobToken, address); taskOwner.addToken(jobToken); umbilical = taskOwner.doAs( new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() { @Override public LlapTaskUmbilicalProtocol run() throws Exception { return RPC.getProxy( LlapTaskUmbilicalProtocol.class, LlapTaskUmbilicalProtocol.versionID, address, conf); } }); taskReporter = new LlapTaskReporter( umbilical, confParams.amHeartbeatIntervalMsMax, confParams.amCounterHeartbeatInterval, confParams.amMaxEventsPerHeartbeat, new AtomicLong(0), request.getContainerIdString()); String attemptId = fragmentInfo.getFragmentIdentifierString(); IOContextMap.setThreadAttemptId(attemptId); try { synchronized (this) { if (shouldRunTask) { taskRunner = new TezTaskRunner2( conf, taskUgi, fragmentInfo.getLocalDirs(), taskSpec, request.getAppAttemptNumber(), serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry, pid, executionContext, memoryAvailable, false); } } if (taskRunner == null) { LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID()); return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false); } try { TaskRunner2Result result = taskRunner.run(); if (result.isContainerShutdownRequested()) { LOG.warn("Unexpected container shutdown requested while running task. Ignoring"); } isCompleted.set(true); return result; } finally { FileSystem.closeAllForUGI(taskUgi); LOG.info( "ExecutionTime for Container: " + request.getContainerIdString() + "=" + runtimeWatch.stop().elapsedMillis()); if (LOG.isDebugEnabled()) { LOG.debug( "canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } } } finally { IOContextMap.clearThreadAttempt(attemptId); } }