// TODO XXX Does this really need to be a YarnConfiguration ?
 protected static void initAndStartAppMaster(
     final DAGAppMaster appMaster, final Configuration conf, String jobUserName)
     throws IOException, InterruptedException {
   Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
   UserGroupInformation.setConfiguration(conf);
   UserGroupInformation appMasterUgi = UserGroupInformation.createRemoteUser(jobUserName);
   appMasterUgi.addCredentials(credentials);
   appMasterUgi.doAs(
       new PrivilegedExceptionAction<Object>() {
         @Override
         public Object run() throws Exception {
           appMaster.init(conf);
           appMaster.start();
           return null;
         }
       });
 }
예제 #2
0
 @Override
 public synchronized void flush() {
   user.addCredentials(credentials);
 }
  /**
   * Main run function for the application master
   *
   * @throws YarnException
   * @throws IOException
   */
  @SuppressWarnings({"unchecked"})
  public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
      publishApplicationAttemptEvent(
          timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
      LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
      Token<?> token = iter.next();
      LOG.info(token);
      if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
        iter.remove();
      }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response =
        amRMClient.registerApplicationMaster(
            appMasterHostname, appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
      LOG.info(
          "Container memory specified above max threshold of cluster."
              + " Using max value."
              + ", specified="
              + containerMemory
              + ", max="
              + maxMem);
      containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
      LOG.info(
          "Container virtual cores specified above max threshold of cluster."
              + " Using max value."
              + ", specified="
              + containerVirtualCores
              + ", max="
              + maxVCores);
      containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(
        "Received "
            + previousAMRunningContainers.size()
            + " previous AM's running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
      ContainerRequest containerAsk = setupContainerAskForRM();
      amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainersToRequest);
    try {
      publishApplicationAttemptEvent(
          timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
      LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
  }
예제 #4
0
  @Override
  protected TaskRunner2Result callInternal() throws Exception {
    isStarted.set(true);

    this.startTime = System.currentTimeMillis();
    this.threadName = Thread.currentThread().getName();
    if (LOG.isDebugEnabled()) {
      LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
    }

    // Unregister from the AMReporter, since the task is now running.
    this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort());

    synchronized (this) {
      if (!shouldRunTask) {
        LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
        return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false);
      }
    }

    // TODO This executor seems unnecessary. Here and TezChild
    ExecutorService executorReal =
        Executors.newFixedThreadPool(
            1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTaskRunner").build());
    executor = MoreExecutors.listeningDecorator(executorReal);

    // TODO Consolidate this code with TezChild.
    runtimeWatch.start();
    UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser());
    taskUgi.addCredentials(credentials);

    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>();
    serviceConsumerMetadata.put(
        TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
        TezCommonUtils.convertJobTokenToBytes(jobToken));
    Multimap<String, String> startedInputsMap = createStartedInputMap(request.getFragmentSpec());

    UserGroupInformation taskOwner =
        UserGroupInformation.createRemoteUser(request.getTokenIdentifier());
    final InetSocketAddress address =
        NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    umbilical =
        taskOwner.doAs(
            new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() {
              @Override
              public LlapTaskUmbilicalProtocol run() throws Exception {
                return RPC.getProxy(
                    LlapTaskUmbilicalProtocol.class,
                    LlapTaskUmbilicalProtocol.versionID,
                    address,
                    conf);
              }
            });

    taskReporter =
        new LlapTaskReporter(
            umbilical,
            confParams.amHeartbeatIntervalMsMax,
            confParams.amCounterHeartbeatInterval,
            confParams.amMaxEventsPerHeartbeat,
            new AtomicLong(0),
            request.getContainerIdString());

    String attemptId = fragmentInfo.getFragmentIdentifierString();
    IOContextMap.setThreadAttemptId(attemptId);
    try {
      synchronized (this) {
        if (shouldRunTask) {
          taskRunner =
              new TezTaskRunner2(
                  conf,
                  taskUgi,
                  fragmentInfo.getLocalDirs(),
                  taskSpec,
                  request.getAppAttemptNumber(),
                  serviceConsumerMetadata,
                  envMap,
                  startedInputsMap,
                  taskReporter,
                  executor,
                  objectRegistry,
                  pid,
                  executionContext,
                  memoryAvailable,
                  false);
        }
      }
      if (taskRunner == null) {
        LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
        return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false);
      }

      try {
        TaskRunner2Result result = taskRunner.run();
        if (result.isContainerShutdownRequested()) {
          LOG.warn("Unexpected container shutdown requested while running task. Ignoring");
        }
        isCompleted.set(true);
        return result;
      } finally {
        FileSystem.closeAllForUGI(taskUgi);
        LOG.info(
            "ExecutionTime for Container: "
                + request.getContainerIdString()
                + "="
                + runtimeWatch.stop().elapsedMillis());
        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
        }
      }
    } finally {
      IOContextMap.clearThreadAttempt(attemptId);
    }
  }