private ApplicationId createApp(YarnClient rmClient, boolean unmanaged) throws Exception { YarnClientApplication newApp = rmClient.createApplication(); ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId(); // Create launch context for app master ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName("test"); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(1); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue("default"); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); appContext.setAMContainerSpec(amContainer); appContext.setResource(Resource.newInstance(1024, 1)); appContext.setUnmanagedAM(unmanaged); // Submit the application to the applications manager rmClient.submitApplication(appContext); return appId; }
/** * Start a Tez Session * * @throws TezException * @throws IOException */ public synchronized void start() throws TezException, IOException { yarnClient = YarnClient.createYarnClient(); yarnClient.init(sessionConfig.getYarnConfiguration()); yarnClient.start(); tezJarResources = TezClientUtils.setupTezJarsLocalResources(sessionConfig.getTezConfiguration()); try { if (applicationId == null) { applicationId = yarnClient.createApplication().getNewApplicationResponse().getApplicationId(); } ApplicationSubmissionContext appContext = TezClientUtils.createApplicationSubmissionContext( sessionConfig.getTezConfiguration(), applicationId, null, sessionName, sessionConfig.getAMConfiguration(), tezJarResources); // Set Tez Sessions to not retry on AM crashes appContext.setMaxAppAttempts(1); tezConfPBLRsrc = appContext .getAMContainerSpec() .getLocalResources() .get(TezConfiguration.TEZ_PB_BINARY_CONF_NAME); yarnClient.submitApplication(appContext); } catch (YarnException e) { throw new TezException(e); } sessionStarted = true; }
@Test public void testClientStop() { Configuration conf = new Configuration(); ResourceManager rm = new ResourceManager(); rm.init(conf); rm.start(); YarnClient client = YarnClient.createYarnClient(); client.init(conf); client.start(); client.stop(); rm.stop(); }
private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception { try { long start = System.currentTimeMillis(); ApplicationReport report = rmClient.getApplicationReport(appId); while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) { if (System.currentTimeMillis() - start > 20 * 1000) { throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state"); } Thread.sleep(200); report = rmClient.getApplicationReport(appId); } } catch (Exception ex) { throw new Exception(ex); } }
public TezSessionStatus getSessionStatus() throws TezException, IOException { try { ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); switch (appReport.getYarnApplicationState()) { case NEW: case NEW_SAVING: case ACCEPTED: case SUBMITTED: return TezSessionStatus.INITIALIZING; case FINISHED: case FAILED: case KILLED: return TezSessionStatus.SHUTDOWN; case RUNNING: try { DAGClientAMProtocolBlockingPB proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy == null) { return TezSessionStatus.INITIALIZING; } GetAMStatusResponseProto response = proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build()); return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus()); } catch (TezException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } catch (ServiceException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } } } catch (YarnException e) { throw new TezException(e); } return TezSessionStatus.INITIALIZING; }
@Test public void testKillApplication() throws Exception { MockRM rm = new MockRM(); rm.start(); RMApp app = rm.submitApp(2000); Configuration conf = new Configuration(); @SuppressWarnings("resource") final YarnClient client = new MockYarnClient(); client.init(conf); client.start(); client.killApplication(app.getApplicationId()); verify(((MockYarnClient) client).getRMClient(), times(2)) .forceKillApplication(any(KillApplicationRequest.class)); }
private void addRMDelegationToken(final String renewer, final Credentials credentials) throws IOException, YarnException { // Get the ResourceManager delegation rmToken final org.apache.hadoop.yarn.api.records.Token rmDelegationToken = yarnClient.getRMDelegationToken(new Text(renewer)); Token<RMDelegationTokenIdentifier> token; // TODO: Use the utility method getRMDelegationTokenService in ClientRMProxy to remove the // separate handling of // TODO: HA and non-HA cases when hadoop dependency is changed to hadoop 2.4 or above if (conf.getBoolean(RM_HA_ENABLED, DEFAULT_RM_HA_ENABLED)) { LOG.info("Yarn Resource Manager HA is enabled"); token = getRMHAToken(rmDelegationToken); } else { LOG.info("Yarn Resource Manager HA is not enabled"); InetSocketAddress rmAddress = conf.getSocketAddr( YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); token = ConverterUtils.convertFromYarn(rmDelegationToken, rmAddress); } LOG.info("RM dt {}", token); credentials.addToken(token.getService(), token); }
/** * Shutdown a Tez Session. * * @throws TezException * @throws IOException */ public synchronized void stop() throws TezException, IOException { LOG.info( "Shutting down Tez Session" + ", sessionName=" + sessionName + ", applicationId=" + applicationId); sessionStopped = true; try { DAGClientAMProtocolBlockingPB proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy != null) { ShutdownSessionRequestProto request = ShutdownSessionRequestProto.newBuilder().build(); proxy.shutdownSession(null, request); return; } } catch (TezException e) { LOG.info("Failed to shutdown Tez Session via proxy", e); } catch (ServiceException e) { LOG.info("Failed to shutdown Tez Session via proxy", e); } LOG.info( "Could not connect to AM, killing session via YARN" + ", sessionName=" + sessionName + ", applicationId=" + applicationId); try { yarnClient.killApplication(applicationId); } catch (YarnException e) { throw new TezException(e); } }
@Test(timeout = 10000) public void testGetContainers() throws YarnException, IOException { Configuration conf = new Configuration(); final YarnClient client = new MockYarnClient(); client.init(conf); client.start(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1); List<ContainerReport> reports = client.getContainers(appAttemptId); Assert.assertNotNull(reports); Assert.assertEquals( reports.get(0).getContainerId(), (ContainerId.newContainerId(appAttemptId, 1))); Assert.assertEquals( reports.get(1).getContainerId(), (ContainerId.newContainerId(appAttemptId, 2))); client.stop(); }
@Test(timeout = 10000) public void testGetApplicationAttempt() throws YarnException, IOException { Configuration conf = new Configuration(); final YarnClient client = new MockYarnClient(); client.init(conf); client.start(); List<ApplicationReport> expectedReports = ((MockYarnClient) client).getReports(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1); ApplicationAttemptReport report = client.getApplicationAttemptReport(appAttemptId); Assert.assertNotNull(report); Assert.assertEquals( report.getApplicationAttemptId().toString(), expectedReports.get(0).getCurrentApplicationAttemptId().toString()); client.stop(); }
/** Test regular operation, including command line parameter parsing. */ @Test(timeout = 60000) // timeout after a minute. public void testDetachedMode() { LOG.info("Starting testDetachedMode()"); addTestAppender(FlinkYarnSessionCli.class, Level.INFO); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "--name", "MyCustomName", // test setting a custom name "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION); checkForLogString("The Flink YARN client has been started in detached mode"); Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("Waiting until two containers are running"); // wait until two containers are running while (getRunningContainers() < 2) { sleep(500); } LOG.info("Two containers are running. Killing the application"); // kill application "externally". try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("MyCustomName", app.getName()); ApplicationId id = app.getApplicationId(); yc.killApplication(id); while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) { sleep(500); } } catch (Throwable t) { LOG.warn("Killing failed", t); Assert.fail(); } LOG.info("Finished testDetachedMode()"); }
@Test(timeout = 10000) public void testGetApplications() throws YarnException, IOException { Configuration conf = new Configuration(); final YarnClient client = new MockYarnClient(); client.init(conf); client.start(); List<ApplicationReport> expectedReports = ((MockYarnClient) client).getReports(); List<ApplicationReport> reports = client.getApplications(); Assert.assertEquals(reports, expectedReports); Set<String> appTypes = new HashSet<String>(); appTypes.add("YARN"); appTypes.add("NON-YARN"); reports = client.getApplications(appTypes, null); Assert.assertEquals(reports.size(), 2); Assert.assertTrue( (reports.get(0).getApplicationType().equals("YARN") && reports.get(1).getApplicationType().equals("NON-YARN")) || (reports.get(1).getApplicationType().equals("YARN") && reports.get(0).getApplicationType().equals("NON-YARN"))); for (ApplicationReport report : reports) { Assert.assertTrue(expectedReports.contains(report)); } EnumSet<YarnApplicationState> appStates = EnumSet.noneOf(YarnApplicationState.class); appStates.add(YarnApplicationState.FINISHED); appStates.add(YarnApplicationState.FAILED); reports = client.getApplications(null, appStates); Assert.assertEquals(reports.size(), 2); Assert.assertTrue( (reports.get(0).getApplicationType().equals("NON-YARN") && reports.get(1).getApplicationType().equals("NON-MAPREDUCE")) || (reports.get(1).getApplicationType().equals("NON-YARN") && reports.get(0).getApplicationType().equals("NON-MAPREDUCE"))); for (ApplicationReport report : reports) { Assert.assertTrue(expectedReports.contains(report)); } reports = client.getApplications(appTypes, appStates); Assert.assertEquals(reports.size(), 1); Assert.assertTrue((reports.get(0).getApplicationType().equals("NON-YARN"))); for (ApplicationReport report : reports) { Assert.assertTrue(expectedReports.contains(report)); } client.stop(); }
@Before public void checkClusterEmpty() throws IOException, YarnException { if (yarnClient == null) { yarnClient = YarnClient.createYarnClient(); yarnClient.init(yarnConfiguration); yarnClient.start(); } List<ApplicationReport> apps = yarnClient.getApplications(); for (ApplicationReport app : apps) { if (app.getYarnApplicationState() != YarnApplicationState.FINISHED && app.getYarnApplicationState() != YarnApplicationState.KILLED && app.getYarnApplicationState() != YarnApplicationState.FAILED) { Assert.fail( "There is at least one application on the cluster is not finished." + "App " + app.getApplicationId() + " is in state " + app.getYarnApplicationState()); } } }
@Override protected void serviceStop() throws Exception { if (this.rmClient != null) { RPC.stopProxy(this.rmClient); } if (historyServiceEnabled) { historyClient.stop(); } if (timelineServiceEnabled) { timelineClient.stop(); } super.serviceStop(); }
@Override public void run() { while (running.get() && yarnClient.isInState(Service.STATE.STARTED)) { try { ApplicationReport report = yarnClient.getApplicationReport(appId); synchronized (lock) { lastReport = report; } } catch (Exception e) { LOG.warn("Error while getting application report", e); } try { Thread.sleep(FlinkYarnCluster.POLLING_THREAD_INTERVAL_MS); } catch (InterruptedException e) { LOG.error("Polling thread got interrupted", e); Thread.currentThread().interrupt(); // pass interrupt. } } if (running.get() && !yarnClient.isInState(Service.STATE.STARTED)) { // == if the polling thread is still running but the yarn client is stopped. LOG.warn("YARN client is unexpected in state " + yarnClient.getServiceState()); } }
@Override protected void serviceStart() throws Exception { try { rmClient = ClientRMProxy.createRMProxy(getConfig(), ApplicationClientProtocol.class); if (historyServiceEnabled) { historyClient.start(); } if (timelineServiceEnabled) { timelineClient.start(); } } catch (IOException e) { throw new YarnRuntimeException(e); } super.serviceStart(); }
@Test public void testParseTimelineDelegationTokenRenewer() throws Exception { // Client side YarnClientImpl client = (YarnClientImpl) YarnClient.createYarnClient(); Configuration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.set(YarnConfiguration.RM_PRINCIPAL, "rm/[email protected]"); conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8188"); try { client.init(conf); client.start(); Assert.assertEquals("rm/[email protected]", client.timelineDTRenewer); } finally { client.stop(); } }
@SuppressWarnings("deprecation") @Test(timeout = 30000) public void testSubmitApplication() { Configuration conf = new Configuration(); conf.setLong( YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS, 100); // speed up tests final YarnClient client = new MockYarnClient(); client.init(conf); client.start(); YarnApplicationState[] exitStates = new YarnApplicationState[] { YarnApplicationState.SUBMITTED, YarnApplicationState.ACCEPTED, YarnApplicationState.RUNNING, YarnApplicationState.FINISHED, YarnApplicationState.FAILED, YarnApplicationState.KILLED }; // Submit an application without ApplicationId provided // Should get ApplicationIdNotProvidedException ApplicationSubmissionContext contextWithoutApplicationId = mock(ApplicationSubmissionContext.class); try { client.submitApplication(contextWithoutApplicationId); Assert.fail("Should throw the ApplicationIdNotProvidedException"); } catch (YarnException e) { Assert.assertTrue(e instanceof ApplicationIdNotProvidedException); Assert.assertTrue( e.getMessage().contains("ApplicationId is not provided in ApplicationSubmissionContext")); } catch (IOException e) { Assert.fail("IOException is not expected."); } // Submit the application with applicationId provided // Should be successful for (int i = 0; i < exitStates.length; ++i) { ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class); ApplicationId applicationId = ApplicationId.newInstance(System.currentTimeMillis(), i); when(context.getApplicationId()).thenReturn(applicationId); ((MockYarnClient) client).setYarnApplicationState(exitStates[i]); try { client.submitApplication(context); } catch (YarnException e) { Assert.fail("Exception is not expected."); } catch (IOException e) { Assert.fail("Exception is not expected."); } verify(((MockYarnClient) client).mockReport, times(4 * i + 4)).getYarnApplicationState(); } client.stop(); }
@SuppressWarnings("deprecation") @Override protected void serviceInit(Configuration conf) throws Exception { asyncApiPollIntervalMillis = conf.getLong( YarnConfiguration.YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS, YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS); asyncApiPollTimeoutMillis = conf.getLong( YarnConfiguration.YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_TIMEOUT_MS, YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_TIMEOUT_MS); submitPollIntervalMillis = asyncApiPollIntervalMillis; if (conf.get(YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS) != null) { submitPollIntervalMillis = conf.getLong( YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS, YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS); } if (conf.getBoolean( YarnConfiguration.APPLICATION_HISTORY_ENABLED, YarnConfiguration.DEFAULT_APPLICATION_HISTORY_ENABLED)) { historyServiceEnabled = true; historyClient = AHSClient.createAHSClient(); historyClient.init(conf); } if (conf.getBoolean( YarnConfiguration.TIMELINE_SERVICE_ENABLED, YarnConfiguration.DEFAULT_TIMELINE_SERVICE_ENABLED)) { timelineServiceEnabled = true; timelineClient = createTimelineClient(); timelineClient.init(conf); timelineDTRenewer = getTimelineDelegationTokenRenewer(conf); timelineService = TimelineUtils.buildTimelineTokenService(conf); } timelineServiceBestEffort = conf.getBoolean( YarnConfiguration.TIMELINE_SERVICE_CLIENT_BEST_EFFORT, YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_BEST_EFFORT); super.serviceInit(conf); }
/** * Create a new Flink on YARN cluster. * * @param yarnClient * @param appId the YARN application ID * @param hadoopConfig * @param flinkConfig * @param sessionFilesDir * @param detached Set to true if no actor system or RPC communication with the cluster should be * established * @throws IOException * @throws YarnException */ public FlinkYarnCluster( final YarnClient yarnClient, final ApplicationId appId, Configuration hadoopConfig, org.apache.flink.configuration.Configuration flinkConfig, Path sessionFilesDir, boolean detached) throws IOException, YarnException { this.akkaDuration = AkkaUtils.getTimeout(flinkConfig); this.akkaTimeout = Timeout.durationToTimeout(akkaDuration); this.yarnClient = yarnClient; this.hadoopConfig = hadoopConfig; this.sessionFilesDir = sessionFilesDir; this.applicationId = appId; this.detached = detached; this.flinkConfig = flinkConfig; this.appId = appId; // get one application report manually intialAppReport = yarnClient.getApplicationReport(appId); String jobManagerHost = intialAppReport.getHost(); int jobManagerPort = intialAppReport.getRpcPort(); this.jobManagerAddress = new InetSocketAddress(jobManagerHost, jobManagerPort); }
public void run(String[] args) throws Exception { final String command = args[0]; final int n = Integer.valueOf(args[1]); final Path jarPath = new Path(args[2]); // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands( Collections.singletonList( "$JAVA_HOME/bin/java" + " -Xmx256M" + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); setupAppMasterJar(jarPath, appMasterJar); System.out.println("Jar name is " + jarPath.getName()); amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("apache-yarn-example"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); System.out.println("Submitting application " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { System.out.println("App Status = " + appState); Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println( "Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); }
/** Test TaskManager failure */ @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3" }, "Number of connected TaskManagers changed to 1. Slots available: 1", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JSONObject parsedTMs = new JSONObject(response); JSONArray taskManagers = parsedTMs.getJSONArray("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.length()); Assert.assertEquals(1, taskManagers.getJSONObject(0).getInt("slotsNumber")); // get the configuration from webinterface & check if the dynamic properties from YARN show up // there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); JSONArray parsed = new JSONArray(jsonConfig); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(parsed); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals( "unable to find hostname in " + parsed, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals( "unable to find port in " + parsed, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster/JobManager (Version")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManagerRunner.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching container") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue( "Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue( "Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue( "Expect to see new container started", eC.contains("Launching container") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, tmpInFile.getAbsoluteFile().toString(), tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort( apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info( "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } // String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue( "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue( "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile( "..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
/** * Submits application to YARN * * <p><br> * * @since 0.3.2 */ @InterfaceAudience.Public @InterfaceStability.Unstable public class StramClient { private static final Logger LOG = LoggerFactory.getLogger(StramClient.class); public static final String YARN_APPLICATION_TYPE = "DataTorrent"; public static final String LIB_JARS_SEP = ","; // TODO: HADOOP UPGRADE - replace with YarnConfiguration constants private static final String RM_HA_PREFIX = YarnConfiguration.RM_PREFIX + "ha."; private static final String RM_HA_IDS = RM_HA_PREFIX + "rm-ids"; private static final String RM_HA_ENABLED = RM_HA_PREFIX + "enabled"; private static final boolean DEFAULT_RM_HA_ENABLED = false; private static final String RM_HOSTNAME_PREFIX = YarnConfiguration.RM_PREFIX + "hostname."; // Configuration private final Configuration conf; // Handle to talk to the Resource Manager/Applications Manager private final YarnClient yarnClient = YarnClient.createYarnClient(); // Application master specific info to register a new Application with RM/ASM // App master priority private final int amPriority = 0; private ApplicationId appId; private final LogicalPlan dag; public String javaCmd = "${JAVA_HOME}" + "/bin/java"; // log4j.properties file // if available, add to local resources and set into classpath private final String log4jPropFile = ""; // Timeout threshold for client. Kill app after time interval expires. private long clientTimeout = 600000; private String originalAppId; private String queueName; private String applicationType = YARN_APPLICATION_TYPE; private String archives; private String files; private LinkedHashSet<String> resources; // platform dependencies that are not part of Hadoop and need to be deployed, // entry below will cause containing jar file from client to be copied to cluster private static final Class<?>[] DATATORRENT_CLASSES = new Class<?>[] { com.datatorrent.netlet.util.Slice.class, com.datatorrent.netlet.EventLoop.class, com.datatorrent.bufferserver.server.Server.class, com.datatorrent.stram.StreamingAppMaster.class, com.datatorrent.api.StreamCodec.class, javax.validation.ConstraintViolationException.class, com.ning.http.client.websocket.WebSocketUpgradeHandler.class, com.esotericsoftware.kryo.Kryo.class, org.apache.bval.jsr303.ApacheValidationProvider.class, org.apache.bval.BeanValidationContext.class, org.apache.commons.lang3.ClassUtils.class, net.engio.mbassy.bus.MBassador.class, org.codehaus.jackson.annotate.JsonUnwrapped.class, org.codehaus.jackson.map.ser.std.RawSerializer.class, org.apache.commons.beanutils.BeanUtils.class, org.apache.http.client.utils.URLEncodedUtils.class, org.apache.http.message.BasicHeaderValueParser.class, com.esotericsoftware.minlog.Log.class, org.objectweb.asm.tree.ClassNode.class, org.mozilla.javascript.Scriptable.class }; private static final Class<?>[] DATATORRENT_SECURITY_SPECIFIC_CLASSES = new Class<?>[] {com.sun.jersey.client.apache4.ApacheHttpClient4Handler.class}; private static final Class<?>[] DATATORRENT_SECURITY_CLASSES = (Class<?>[]) ArrayUtils.addAll(DATATORRENT_CLASSES, DATATORRENT_SECURITY_SPECIFIC_CLASSES); public StramClient(Configuration conf, LogicalPlan dag) throws Exception { this.conf = conf; this.dag = dag; dag.validate(); yarnClient.init(conf); } public void start() { yarnClient.start(); } public void stop() { yarnClient.stop(); } public static LinkedHashSet<String> findJars(LogicalPlan dag, Class<?>[] defaultClasses) { List<Class<?>> jarClasses = new ArrayList<Class<?>>(); for (String className : dag.getClassNames()) { try { Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(className); jarClasses.add(clazz); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Failed to load class " + className, e); } } for (Class<?> clazz : Lists.newArrayList(jarClasses)) { // process class and super classes (super does not require deploy annotation) for (Class<?> c = clazz; c != Object.class && c != null; c = c.getSuperclass()) { // LOG.debug("checking " + c); jarClasses.add(c); jarClasses.addAll(Arrays.asList(c.getInterfaces())); } } jarClasses.addAll(Arrays.asList(defaultClasses)); if (dag.isDebug()) { LOG.debug("Deploy dependencies: {}", jarClasses); } LinkedHashSet<String> localJarFiles = new LinkedHashSet<String>(); // avoid duplicates HashMap<String, String> sourceToJar = new HashMap<String, String>(); for (Class<?> jarClass : jarClasses) { if (jarClass.getProtectionDomain().getCodeSource() == null) { // system class continue; } String sourceLocation = jarClass.getProtectionDomain().getCodeSource().getLocation().toString(); String jar = sourceToJar.get(sourceLocation); if (jar == null) { // don't create jar file from folders multiple times jar = JarFinder.getJar(jarClass); sourceToJar.put(sourceLocation, jar); LOG.debug("added sourceLocation {} as {}", sourceLocation, jar); } if (jar == null) { throw new AssertionError("Cannot resolve jar file for " + jarClass); } localJarFiles.add(jar); } String libJarsPath = dag.getValue(LogicalPlan.LIBRARY_JARS); if (!StringUtils.isEmpty(libJarsPath)) { String[] libJars = StringUtils.splitByWholeSeparator(libJarsPath, LIB_JARS_SEP); localJarFiles.addAll(Arrays.asList(libJars)); } LOG.info("Local jar file dependencies: " + localJarFiles); return localJarFiles; } private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException { StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16)); for (String localFile : files) { Path src = new Path(localFile); String filename = src.getName(); Path dst = new Path(basePath, filename); URI localFileURI = null; try { localFileURI = new URI(localFile); } catch (URISyntaxException e) { throw new IOException(e); } if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) { LOG.info("Copy {} from local filesystem to {}", localFile, dst); fs.copyFromLocalFile(false, true, src, dst); } else { LOG.info("Copy {} from DFS to {}", localFile, dst); FileUtil.copy(fs, src, fs, dst, false, true, conf); } if (csv.length() > 0) { csv.append(LIB_JARS_SEP); } csv.append(dst.toString()); } return csv.toString(); } public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); } InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); // FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); // FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } } // TODO: HADOOP UPGRADE - replace with YarnConfiguration constants private Token<RMDelegationTokenIdentifier> getRMHAToken( org.apache.hadoop.yarn.api.records.Token rmDelegationToken) { // Build a list of service addresses to form the service name ArrayList<String> services = new ArrayList<String>(); for (String rmId : conf.getStringCollection(RM_HA_IDS)) { LOG.info("Yarn Resource Manager id: {}", rmId); // Set RM_ID to get the corresponding RM_ADDRESS services.add( SecurityUtil.buildTokenService( NetUtils.createSocketAddr( conf.get(RM_HOSTNAME_PREFIX + rmId), YarnConfiguration.DEFAULT_RM_PORT, RM_HOSTNAME_PREFIX + rmId)) .toString()); } Text rmTokenService = new Text(Joiner.on(',').join(services)); return new Token<RMDelegationTokenIdentifier>( rmDelegationToken.getIdentifier().array(), rmDelegationToken.getPassword().array(), new Text(rmDelegationToken.getKind()), rmTokenService); } private void addRMDelegationToken(final String renewer, final Credentials credentials) throws IOException, YarnException { // Get the ResourceManager delegation rmToken final org.apache.hadoop.yarn.api.records.Token rmDelegationToken = yarnClient.getRMDelegationToken(new Text(renewer)); Token<RMDelegationTokenIdentifier> token; // TODO: Use the utility method getRMDelegationTokenService in ClientRMProxy to remove the // separate handling of // TODO: HA and non-HA cases when hadoop dependency is changed to hadoop 2.4 or above if (conf.getBoolean(RM_HA_ENABLED, DEFAULT_RM_HA_ENABLED)) { LOG.info("Yarn Resource Manager HA is enabled"); token = getRMHAToken(rmDelegationToken); } else { LOG.info("Yarn Resource Manager HA is not enabled"); InetSocketAddress rmAddress = conf.getSocketAddr( YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); token = ConverterUtils.convertFromYarn(rmDelegationToken, rmAddress); } LOG.info("RM dt {}", token); credentials.addToken(token.getService(), token); } /** * Launch application for the dag represented by this client. * * @throws YarnException * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { // TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info( "Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); // GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); // GetClusterNodesResponse clusterNodesResp = // rmClient.clientRM.getClusterNodes(clusterNodesReq); // LOG.info("Got Cluster node info from ASM"); // for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); // } List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info( "User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info( "AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { // appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute( OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute( LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now // StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()) .add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()) .add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { // LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } } public ApplicationReport getApplicationReport() throws YarnException, IOException { return yarnClient.getApplicationReport(this.appId); } public void killApplication() throws YarnException, IOException { yarnClient.killApplication(this.appId); } public void setClientTimeout(long timeoutMillis) { this.clientTimeout = timeoutMillis; } /** * Monitor the submitted application for completion. Kill application if time expires. * * @return true if application completed successfully * @throws YarnException * @throws IOException */ public boolean monitorApplication() throws YarnException, IOException { ClientRMHelper.AppStatusCallback callback = new ClientRMHelper.AppStatusCallback() { @Override public boolean exitLoop(ApplicationReport report) { LOG.info( "Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser()); return false; } }; ClientRMHelper rmClient = new ClientRMHelper(yarnClient); return rmClient.waitForCompletion(appId, callback, clientTimeout); } public void setApplicationType(String type) { this.applicationType = type; } public void setOriginalAppId(String appId) { this.originalAppId = appId; } public String getQueueName() { return queueName; } public void setQueueName(String queueName) { this.queueName = queueName; } public void setResources(LinkedHashSet<String> resources) { this.resources = resources; } public void setArchives(String archives) { this.archives = archives; } public void setFiles(String files) { this.files = files; } }
public void killApplication() throws YarnException, IOException { yarnClient.killApplication(this.appId); }
public ApplicationReport getApplicationReport() throws YarnException, IOException { return yarnClient.getApplicationReport(this.appId); }
/** * Launch application for the dag represented by this client. * * @throws YarnException * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { // TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info( "Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); // GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); // GetClusterNodesResponse clusterNodesResp = // rmClient.clientRM.getClusterNodes(clusterNodesReq); // LOG.info("Got Cluster node info from ASM"); // for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); // } List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info( "User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info( "AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { // appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute( OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute( LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now // StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()) .add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()) .add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { // LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }
public void stop() { yarnClient.stop(); }
public void start() { yarnClient.start(); }
public StramClient(Configuration conf, LogicalPlan dag) throws Exception { this.conf = conf; this.dag = dag; dag.validate(); yarnClient.init(conf); }