public SocialiteAppMaster(Path _jarPath) { jarPath = _jarPath; configuration = new YarnConfiguration(); numAllocedContainers = 0; nmClient = NMClient.createNMClient(); nmClient.init(configuration); nmClient.start(); rmClient = AMRMClientAsync.createAMRMClientAsync(100, this); rmClient.init(getConfiguration()); rmClient.start(); }
public void runMainLoop() throws Exception { rmClient.registerApplicationMaster("", 0, ""); initRegistry(); startMasterNode(); Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); Resource capability = Records.newRecord(Resource.class); capability.setMemory(ClusterConf.get().getWorkerHeapSize()); capability.setVirtualCores(ClusterConf.get().getNumWorkerThreads()); List<ContainerRequest> containerReq = new ArrayList<ContainerRequest>(); for (int i = 0; i < ClusterConf.get().getNumWorkers(); ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); rmClient.addContainerRequest(containerAsk); containerReq.add(containerAsk); } while (alive) { Thread.sleep(1000); } finish(); /* for (ContainerRequest req : containerReq) { rmClient.removeContainerRequest(req); } int containersToAdd = 2; numContainersToWaitFor = containersToAdd; System.out.println("[Am] finished all containers. Asking for more containers, total=" + numContainersToWaitFor); for (int i = 0; i < containersToAdd; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); System.out.println("[AM] Making res-req " + i); rmClient.addContainerRequest(containerAsk); } System.out.println("[AM] waiting for containers to finish once more!!!"); while (!doneWithContainers()) { Thread.sleep(100); } */ }
public YarnService( Config config, String applicationName, String applicationId, YarnConfiguration yarnConfiguration, FileSystem fs, EventBus eventBus) throws Exception { this.applicationName = applicationName; this.applicationId = applicationId; this.config = config; this.eventBus = eventBus; this.gobblinMetrics = config.getBoolean(ConfigurationKeys.METRICS_ENABLED_KEY) ? Optional.of(buildGobblinMetrics()) : Optional.<GobblinMetrics>absent(); this.eventSubmitter = config.getBoolean(ConfigurationKeys.METRICS_ENABLED_KEY) ? Optional.of(buildEventSubmitter()) : Optional.<EventSubmitter>absent(); this.yarnConfiguration = yarnConfiguration; this.fs = fs; this.amrmClientAsync = closer.register( AMRMClientAsync.createAMRMClientAsync(1000, new AMRMClientCallbackHandler())); this.amrmClientAsync.init(this.yarnConfiguration); this.nmClientAsync = closer.register(NMClientAsync.createNMClientAsync(new NMClientCallbackHandler())); this.nmClientAsync.init(this.yarnConfiguration); this.initialContainers = config.getInt(GobblinYarnConfigurationKeys.INITIAL_CONTAINERS_KEY); this.requestedContainerMemoryMbs = config.getInt(GobblinYarnConfigurationKeys.CONTAINER_MEMORY_MBS_KEY); this.requestedContainerCores = config.getInt(GobblinYarnConfigurationKeys.CONTAINER_CORES_KEY); this.containerHostAffinityEnabled = config.getBoolean(GobblinYarnConfigurationKeys.CONTAINER_HOST_AFFINITY_ENABLED); this.helixInstanceMaxRetries = config.getInt(GobblinYarnConfigurationKeys.HELIX_INSTANCE_MAX_RETRIES); this.containerJvmArgs = config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_JVM_ARGS_KEY) ? Optional.of(config.getString(GobblinYarnConfigurationKeys.CONTAINER_JVM_ARGS_KEY)) : Optional.<String>absent(); this.containerLaunchExecutor = Executors.newFixedThreadPool( 10, ExecutorsUtils.newThreadFactory( Optional.of(LOGGER), Optional.of("ContainerLaunchExecutor"))); this.tokens = getSecurityTokens(); }
void finish() throws IOException, YarnException { rmClient.unregisterApplicationMaster(finalStatus, appMessage, null); nmClient.stop(); }
@VisibleForTesting protected boolean finish() { // wait for completion. while (!done && (numCompletedContainers.get() != numTotalContainers)) { try { Thread.sleep(200); } catch (InterruptedException ex) { } } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); return success; }
/** * Main run function for the application master * * @throws YarnException * @throws IOException */ @SuppressWarnings({"unchecked"}) public void run() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); try { publishApplicationAttemptEvent( timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START); } catch (Exception e) { LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e); } Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); LOG.info("Executing with tokens:"); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.info(token); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // Create appSubmitterUgi and add original tokens to it String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name()); appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName); appSubmitterUgi.addCredentials(credentials); AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler(); amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Setup local RPC Server to accept status requests directly from clients // TODO need to setup a protocol for client to be able to communicate to // the RPC server // TODO use the rpc port info to register with the RM for the client to // send requests to this app master // Register self with ResourceManager // This will start heartbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster( appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the // resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int maxVCores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores); // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info( "Container memory specified above max threshold of cluster." + " Using max value." + ", specified=" + containerMemory + ", max=" + maxMem); containerMemory = maxMem; } if (containerVirtualCores > maxVCores) { LOG.info( "Container virtual cores specified above max threshold of cluster." + " Using max value." + ", specified=" + containerVirtualCores + ", max=" + maxVCores); containerVirtualCores = maxVCores; } List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts(); LOG.info( "Received " + previousAMRunningContainers.size() + " previous AM's running containers on AM registration."); numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size(); // Setup ask for containers from RM // Send request for containers to RM // Until we get our fully allocated quota, we keep on polling RM for // containers // Keep looping until all the containers are launched and shell script // executed on them ( regardless of success/failure). for (int i = 0; i < numTotalContainersToRequest; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainersToRequest); try { publishApplicationAttemptEvent( timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END); } catch (Exception e) { LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e); } }