// TODO XXX Does this really need to be a YarnConfiguration ? protected static void initAndStartAppMaster( final DAGAppMaster appMaster, final Configuration conf, String jobUserName) throws IOException, InterruptedException { Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); UserGroupInformation.setConfiguration(conf); UserGroupInformation appMasterUgi = UserGroupInformation.createRemoteUser(jobUserName); appMasterUgi.addCredentials(credentials); appMasterUgi.doAs( new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { appMaster.init(conf); appMaster.start(); return null; } }); }
@Override public synchronized void flush() { user.addCredentials(credentials); }
/** * Main run function for the application master * * @throws YarnException * @throws IOException */ @SuppressWarnings({"unchecked"}) public void run() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); try { publishApplicationAttemptEvent( timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START); } catch (Exception e) { LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e); } Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); LOG.info("Executing with tokens:"); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.info(token); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // Create appSubmitterUgi and add original tokens to it String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name()); appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName); appSubmitterUgi.addCredentials(credentials); AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler(); amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Setup local RPC Server to accept status requests directly from clients // TODO need to setup a protocol for client to be able to communicate to // the RPC server // TODO use the rpc port info to register with the RM for the client to // send requests to this app master // Register self with ResourceManager // This will start heartbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster( appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the // resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int maxVCores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores); // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info( "Container memory specified above max threshold of cluster." + " Using max value." + ", specified=" + containerMemory + ", max=" + maxMem); containerMemory = maxMem; } if (containerVirtualCores > maxVCores) { LOG.info( "Container virtual cores specified above max threshold of cluster." + " Using max value." + ", specified=" + containerVirtualCores + ", max=" + maxVCores); containerVirtualCores = maxVCores; } List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts(); LOG.info( "Received " + previousAMRunningContainers.size() + " previous AM's running containers on AM registration."); numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size(); // Setup ask for containers from RM // Send request for containers to RM // Until we get our fully allocated quota, we keep on polling RM for // containers // Keep looping until all the containers are launched and shell script // executed on them ( regardless of success/failure). for (int i = 0; i < numTotalContainersToRequest; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainersToRequest); try { publishApplicationAttemptEvent( timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END); } catch (Exception e) { LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e); } }
@Override protected TaskRunner2Result callInternal() throws Exception { isStarted.set(true); this.startTime = System.currentTimeMillis(); this.threadName = Thread.currentThread().getName(); if (LOG.isDebugEnabled()) { LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } // Unregister from the AMReporter, since the task is now running. this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort()); synchronized (this) { if (!shouldRunTask) { LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID()); return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false); } } // TODO This executor seems unnecessary. Here and TezChild ExecutorService executorReal = Executors.newFixedThreadPool( 1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTaskRunner").build()); executor = MoreExecutors.listeningDecorator(executorReal); // TODO Consolidate this code with TezChild. runtimeWatch.start(); UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser()); taskUgi.addCredentials(credentials); Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>(); serviceConsumerMetadata.put( TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID, TezCommonUtils.convertJobTokenToBytes(jobToken)); Multimap<String, String> startedInputsMap = createStartedInputMap(request.getFragmentSpec()); UserGroupInformation taskOwner = UserGroupInformation.createRemoteUser(request.getTokenIdentifier()); final InetSocketAddress address = NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort()); SecurityUtil.setTokenService(jobToken, address); taskOwner.addToken(jobToken); umbilical = taskOwner.doAs( new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() { @Override public LlapTaskUmbilicalProtocol run() throws Exception { return RPC.getProxy( LlapTaskUmbilicalProtocol.class, LlapTaskUmbilicalProtocol.versionID, address, conf); } }); taskReporter = new LlapTaskReporter( umbilical, confParams.amHeartbeatIntervalMsMax, confParams.amCounterHeartbeatInterval, confParams.amMaxEventsPerHeartbeat, new AtomicLong(0), request.getContainerIdString()); String attemptId = fragmentInfo.getFragmentIdentifierString(); IOContextMap.setThreadAttemptId(attemptId); try { synchronized (this) { if (shouldRunTask) { taskRunner = new TezTaskRunner2( conf, taskUgi, fragmentInfo.getLocalDirs(), taskSpec, request.getAppAttemptNumber(), serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry, pid, executionContext, memoryAvailable, false); } } if (taskRunner == null) { LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID()); return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, false); } try { TaskRunner2Result result = taskRunner.run(); if (result.isContainerShutdownRequested()) { LOG.warn("Unexpected container shutdown requested while running task. Ignoring"); } isCompleted.set(true); return result; } finally { FileSystem.closeAllForUGI(taskUgi); LOG.info( "ExecutionTime for Container: " + request.getContainerIdString() + "=" + runtimeWatch.stop().elapsedMillis()); if (LOG.isDebugEnabled()) { LOG.debug( "canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } } } finally { IOContextMap.clearThreadAttempt(attemptId); } }