예제 #1
0
    // Errors are handled on the way over. FAIL/SUCCESS is informed via regular heartbeats. Killed
    // via a kill message when a task kill is requested by the daemon.
    @Override
    public void onSuccess(TaskRunner2Result result) {
      isCompleted.set(true);

      switch (result.getEndReason()) {
          // Only the KILLED case requires a message to be sent out to the AM.
        case SUCCESS:
          LOG.debug("Successfully finished {}", requestId);
          metrics.incrExecutorTotalSuccess();
          break;
        case CONTAINER_STOP_REQUESTED:
          LOG.info("Received container stop request (AM preemption) for {}", requestId);
          break;
        case KILL_REQUESTED:
          LOG.info("Killed task {}", requestId);
          if (killtimerWatch.isRunning()) {
            killtimerWatch.stop();
            long elapsed = killtimerWatch.elapsedMillis();
            LOG.info("Time to die for task {}", elapsed);
          }
          metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis());
          metrics.incrExecutorTotalKilled();
          break;
        case COMMUNICATION_FAILURE:
          LOG.info("Failed to run {} due to communication failure", requestId);
          metrics.incrExecutorTotalExecutionFailed();
          break;
        case TASK_ERROR:
          LOG.info("Failed to run {} due to task error", requestId);
          metrics.incrExecutorTotalExecutionFailed();
          break;
      }
      fragmentCompletionHanler.fragmentComplete(fragmentInfo);

      taskRunnerCallable.shutdown();
      HistoryLogger.logFragmentEnd(
          request.getApplicationIdString(),
          request.getContainerIdString(),
          executionContext.getHostName(),
          request.getFragmentSpec().getDagName(),
          request.getFragmentSpec().getVertexName(),
          request.getFragmentSpec().getFragmentNumber(),
          request.getFragmentSpec().getAttemptNumber(),
          taskRunnerCallable.threadName,
          taskRunnerCallable.startTime,
          true);
      metrics.decrExecutorNumQueuedRequests();
    }
예제 #2
0
 public static String getTaskIdentifierString(SubmitWorkRequestProto request) {
   StringBuilder sb = new StringBuilder();
   sb.append("AppId=")
       .append(request.getApplicationIdString())
       .append(", containerId=")
       .append(request.getContainerIdString())
       .append(", Dag=")
       .append(request.getFragmentSpec().getDagName())
       .append(", Vertex=")
       .append(request.getFragmentSpec().getVertexName())
       .append(", FragmentNum=")
       .append(request.getFragmentSpec().getFragmentNumber())
       .append(", Attempt=")
       .append(request.getFragmentSpec().getAttemptNumber());
   return sb.toString();
 }
예제 #3
0
 @Override
 public void onFailure(Throwable t) {
   LOG.error("TezTaskRunner execution failed for : " + getTaskIdentifierString(request), t);
   isCompleted.set(true);
   fragmentCompletionHanler.fragmentComplete(fragmentInfo);
   // TODO HIVE-10236 Report a fatal error over the umbilical
   taskRunnerCallable.shutdown();
   HistoryLogger.logFragmentEnd(
       request.getApplicationIdString(),
       request.getContainerIdString(),
       executionContext.getHostName(),
       request.getFragmentSpec().getDagName(),
       request.getFragmentSpec().getVertexName(),
       request.getFragmentSpec().getFragmentNumber(),
       request.getFragmentSpec().getAttemptNumber(),
       taskRunnerCallable.threadName,
       taskRunnerCallable.startTime,
       false);
   if (metrics != null) {
     metrics.decrExecutorNumQueuedRequests();
   }
 }
예제 #4
0
  @Override
  protected TaskRunner2Result callInternal() throws Exception {
    isStarted.set(true);

    this.startTime = System.currentTimeMillis();
    this.threadName = Thread.currentThread().getName();
    if (LOG.isDebugEnabled()) {
      LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
    }

    // Unregister from the AMReporter, since the task is now running.
    this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort());

    synchronized (this) {
      if (!shouldRunTask) {
        LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
        return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false);
      }
    }

    // TODO This executor seems unnecessary. Here and TezChild
    ExecutorService executorReal =
        Executors.newFixedThreadPool(
            1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTaskRunner").build());
    executor = MoreExecutors.listeningDecorator(executorReal);

    // TODO Consolidate this code with TezChild.
    runtimeWatch.start();
    UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser());
    taskUgi.addCredentials(credentials);

    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>();
    serviceConsumerMetadata.put(
        TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
        TezCommonUtils.convertJobTokenToBytes(jobToken));
    Multimap<String, String> startedInputsMap = createStartedInputMap(request.getFragmentSpec());

    UserGroupInformation taskOwner =
        UserGroupInformation.createRemoteUser(request.getTokenIdentifier());
    final InetSocketAddress address =
        NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    umbilical =
        taskOwner.doAs(
            new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() {
              @Override
              public LlapTaskUmbilicalProtocol run() throws Exception {
                return RPC.getProxy(
                    LlapTaskUmbilicalProtocol.class,
                    LlapTaskUmbilicalProtocol.versionID,
                    address,
                    conf);
              }
            });

    taskReporter =
        new LlapTaskReporter(
            umbilical,
            confParams.amHeartbeatIntervalMsMax,
            confParams.amCounterHeartbeatInterval,
            confParams.amMaxEventsPerHeartbeat,
            new AtomicLong(0),
            request.getContainerIdString());

    String attemptId = fragmentInfo.getFragmentIdentifierString();
    IOContextMap.setThreadAttemptId(attemptId);
    try {
      synchronized (this) {
        if (shouldRunTask) {
          taskRunner =
              new TezTaskRunner2(
                  conf,
                  taskUgi,
                  fragmentInfo.getLocalDirs(),
                  taskSpec,
                  request.getAppAttemptNumber(),
                  serviceConsumerMetadata,
                  envMap,
                  startedInputsMap,
                  taskReporter,
                  executor,
                  objectRegistry,
                  pid,
                  executionContext,
                  memoryAvailable,
                  false);
        }
      }
      if (taskRunner == null) {
        LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
        return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false);
      }

      try {
        TaskRunner2Result result = taskRunner.run();
        if (result.isContainerShutdownRequested()) {
          LOG.warn("Unexpected container shutdown requested while running task. Ignoring");
        }
        isCompleted.set(true);
        return result;
      } finally {
        FileSystem.closeAllForUGI(taskUgi);
        LOG.info(
            "ExecutionTime for Container: "
                + request.getContainerIdString()
                + "="
                + runtimeWatch.stop().elapsedMillis());
        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
        }
      }
    } finally {
      IOContextMap.clearThreadAttempt(attemptId);
    }
  }