@Test public void testOperatorFailureRecovery() throws Exception { LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.toURI().toString()); FailingOperator badOperator = dag.addOperator("badOperator", FailingOperator.class); dag.getContextAttributes(badOperator).put(OperatorContext.RECOVERY_ATTEMPTS, 1); LOG.info("Initializing Client"); StramClient client = new StramClient(conf, dag); if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) { client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster } try { client.start(); client.startApplication(); client.setClientTimeout(120000); boolean result = client.monitorApplication(); LOG.info("Client run completed. Result=" + result); Assert.assertFalse("should fail", result); ApplicationReport ar = client.getApplicationReport(); Assert.assertEquals( "should fail", FinalApplicationStatus.FAILED, ar.getFinalApplicationStatus()); // unable to get the diagnostics message set by the AM here - see YARN-208 // diagnostics message does not make it here even with Hadoop 2.2 (but works on standalone // cluster) // Assert.assertTrue("appReport " + ar, ar.getDiagnostics().contains("badOperator")); } finally { client.stop(); } }
public TezSessionStatus getSessionStatus() throws TezException, IOException { try { ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); switch (appReport.getYarnApplicationState()) { case NEW: case NEW_SAVING: case ACCEPTED: case SUBMITTED: return TezSessionStatus.INITIALIZING; case FINISHED: case FAILED: case KILLED: return TezSessionStatus.SHUTDOWN; case RUNNING: try { DAGClientAMProtocolBlockingPB proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy == null) { return TezSessionStatus.INITIALIZING; } GetAMStatusResponseProto response = proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build()); return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus()); } catch (TezException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } catch (ServiceException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } } } catch (YarnException e) { throw new TezException(e); } return TezSessionStatus.INITIALIZING; }
@Override public boolean hasFailed() { if (!isConnected) { throw new IllegalStateException("The cluster has been connected to the ApplicationMaster."); } if (pollingRunner == null) { LOG.warn( "FlinkYarnCluster.hasFailed() has been called on an uninitialized cluster." + "The system might be in an erroneous state"); } ApplicationReport lastReport = pollingRunner.getLastReport(); if (lastReport == null) { LOG.warn( "FlinkYarnCluster.hasFailed() has been called on a cluster that didn't receive a status so far." + "The system might be in an erroneous state"); return false; } else { YarnApplicationState appState = lastReport.getYarnApplicationState(); boolean status = (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED); if (status) { LOG.warn("YARN reported application state {}", appState); LOG.warn("Diagnostics: {}", lastReport.getDiagnostics()); } return status; } }
/** Test regular operation, including command line parameter parsing. */ @Test(timeout = 60000) // timeout after a minute. public void testDetachedMode() { LOG.info("Starting testDetachedMode()"); addTestAppender(FlinkYarnSessionCli.class, Level.INFO); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "--name", "MyCustomName", // test setting a custom name "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION); checkForLogString("The Flink YARN client has been started in detached mode"); Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("Waiting until two containers are running"); // wait until two containers are running while (getRunningContainers() < 2) { sleep(500); } LOG.info("Two containers are running. Killing the application"); // kill application "externally". try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("MyCustomName", app.getName()); ApplicationId id = app.getApplicationId(); yc.killApplication(id); while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) { sleep(500); } } catch (Throwable t) { LOG.warn("Killing failed", t); Assert.fail(); } LOG.info("Finished testDetachedMode()"); }
public static String appReportToString(ApplicationReport r, String separator) { StringBuilder builder = new StringBuilder(512); builder.append("application ").append(r.getName()).append("/").append(r.getApplicationType()); builder.append(separator).append("state: ").append(r.getYarnApplicationState()); builder.append(separator).append("URL: ").append(r.getTrackingUrl()); builder .append(separator) .append("Started ") .append(new Date(r.getStartTime()).toLocaleString()); long finishTime = r.getFinishTime(); if (finishTime > 0) { builder.append(separator).append("Finished ").append(new Date(finishTime).toLocaleString()); } builder .append(separator) .append("RPC :") .append(r.getHost()) .append(':') .append(r.getRpcPort()); String diagnostics = r.getDiagnostics(); if (!diagnostics.isEmpty()) { builder.append(separator).append("Diagnostics :").append(diagnostics); } return builder.toString(); }
private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception { try { long start = System.currentTimeMillis(); ApplicationReport report = rmClient.getApplicationReport(appId); while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) { if (System.currentTimeMillis() - start > 20 * 1000) { throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state"); } Thread.sleep(200); report = rmClient.getApplicationReport(appId); } } catch (Exception ex) { throw new Exception(ex); } }
public void setYarnApplicationState(YarnApplicationState state) { when(mockReport.getYarnApplicationState()) .thenReturn( YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.NEW_SAVING, state); }
@Override public String getDiagnostics() { if (!isConnected) { throw new IllegalStateException("The cluster has been connected to the ApplicationMaster."); } if (!hasFailed()) { LOG.warn("getDiagnostics() called for cluster which is not in failed state"); } ApplicationReport lastReport = pollingRunner.getLastReport(); if (lastReport == null) { LOG.warn("Last report is null"); return null; } else { return lastReport.getDiagnostics(); } }
@Test public void testHistoryServerNotConfigured() throws Exception { // RM doesn't have app report and job History Server is not configured ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(null, getRMDelegate()); JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); Assert.assertEquals("N/A", jobStatus.getUsername()); Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState()); // RM has app report and job History Server is not configured ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); ApplicationReport applicationReport = getFinishedApplicationReport(); when(rm.getApplicationReport(jobId.getAppId())).thenReturn(applicationReport); clientServiceDelegate = getClientServiceDelegate(null, rm); jobStatus = clientServiceDelegate.getJobStatus(oldJobId); Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername()); Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState()); }
@Test(timeout = 20000) public void testJobSubmissionFailure() throws Exception { when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))) .thenReturn(appId); ApplicationReport report = mock(ApplicationReport.class); when(report.getApplicationId()).thenReturn(appId); when(report.getDiagnostics()).thenReturn(failString); when(report.getYarnApplicationState()).thenReturn(YarnApplicationState.FAILED); when(resourceMgrDelegate.getApplicationReport(appId)).thenReturn(report); Credentials credentials = new Credentials(); File jobxml = new File(testWorkDir, "job.xml"); OutputStream out = new FileOutputStream(jobxml); conf.writeXml(out); out.close(); try { yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials); } catch (IOException io) { LOG.info("Logging exception:", io); assertTrue(io.getLocalizedMessage().contains(failString)); } }
private List<ApplicationReport> getApplicationReports( List<ApplicationReport> applicationReports, Set<String> applicationTypes, EnumSet<YarnApplicationState> applicationStates) { List<ApplicationReport> appReports = new ArrayList<ApplicationReport>(); for (ApplicationReport appReport : applicationReports) { if (applicationTypes != null && !applicationTypes.isEmpty()) { if (!applicationTypes.contains(appReport.getApplicationType())) { continue; } } if (applicationStates != null && !applicationStates.isEmpty()) { if (!applicationStates.contains(appReport.getYarnApplicationState())) { continue; } } appReports.add(appReport); } return appReports; }
@Before public void checkClusterEmpty() throws IOException, YarnException { if (yarnClient == null) { yarnClient = YarnClient.createYarnClient(); yarnClient.init(yarnConfiguration); yarnClient.start(); } List<ApplicationReport> apps = yarnClient.getApplications(); for (ApplicationReport app : apps) { if (app.getYarnApplicationState() != YarnApplicationState.FINISHED && app.getYarnApplicationState() != YarnApplicationState.KILLED && app.getYarnApplicationState() != YarnApplicationState.FAILED) { Assert.fail( "There is at least one application on the cluster is not finished." + "App " + app.getApplicationId() + " is in state " + app.getYarnApplicationState()); } } }
/** * Create a new Flink on YARN cluster. * * @param yarnClient * @param appId the YARN application ID * @param hadoopConfig * @param flinkConfig * @param sessionFilesDir * @param detached Set to true if no actor system or RPC communication with the cluster should be * established * @throws IOException * @throws YarnException */ public FlinkYarnCluster( final YarnClient yarnClient, final ApplicationId appId, Configuration hadoopConfig, org.apache.flink.configuration.Configuration flinkConfig, Path sessionFilesDir, boolean detached) throws IOException, YarnException { this.akkaDuration = AkkaUtils.getTimeout(flinkConfig); this.akkaTimeout = Timeout.durationToTimeout(akkaDuration); this.yarnClient = yarnClient; this.hadoopConfig = hadoopConfig; this.sessionFilesDir = sessionFilesDir; this.applicationId = appId; this.detached = detached; this.flinkConfig = flinkConfig; this.appId = appId; // get one application report manually intialAppReport = yarnClient.getApplicationReport(appId); String jobManagerHost = intialAppReport.getHost(); int jobManagerPort = intialAppReport.getRpcPort(); this.jobManagerAddress = new InetSocketAddress(jobManagerHost, jobManagerPort); }
/** * convert an AM report to a string for diagnostics * * @param report the report * @return the string value */ public static String reportToString(ApplicationReport report) { if (report == null) { return "Null application report"; } return "App " + report.getName() + "/" + report.getApplicationType() + "# " + report.getApplicationId() + " user " + report.getUser() + " is in state " + report.getYarnApplicationState() + "RPC: " + report.getHost() + ":" + report.getRpcPort(); }
private ApplicationReport getRunningApplicationReport(String host, int port) { ApplicationId appId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 0); return ApplicationReport.newInstance( appId, attemptId, "user", "queue", "appname", host, port, null, YarnApplicationState.RUNNING, "diagnostics", "url", 0, 0, FinalApplicationStatus.UNDEFINED, null, "N/A", 0.0f, YarnConfiguration.DEFAULT_APPLICATION_TYPE, null); }
private ApplicationReport getFinishedApplicationReport() { ApplicationId appId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 0); return ApplicationReport.newInstance( appId, attemptId, "user", "queue", "appname", "host", 124, null, YarnApplicationState.FINISHED, "diagnostics", "url", 0, 0, FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.0f, YarnConfiguration.DEFAULT_APPLICATION_TYPE, null); }
@Override public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { ApplicationId applicationId = appContext.getApplicationId(); if (applicationId == null) { throw new ApplicationIdNotProvidedException( "ApplicationId is not provided in ApplicationSubmissionContext"); } SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class); request.setApplicationSubmissionContext(appContext); // Automatically add the timeline DT into the CLC // Only when the security and the timeline service are both enabled if (isSecurityEnabled() && timelineServiceEnabled) { addTimelineDelegationToken(appContext.getAMContainerSpec()); } // TODO: YARN-1763:Handle RM failovers during the submitApplication call. rmClient.submitApplication(request); int pollCount = 0; long startTime = System.currentTimeMillis(); EnumSet<YarnApplicationState> waitingStates = EnumSet.of( YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.SUBMITTED); EnumSet<YarnApplicationState> failToSubmitStates = EnumSet.of(YarnApplicationState.FAILED, YarnApplicationState.KILLED); while (true) { try { ApplicationReport appReport = getApplicationReport(applicationId); YarnApplicationState state = appReport.getYarnApplicationState(); if (!waitingStates.contains(state)) { if (failToSubmitStates.contains(state)) { throw new YarnException( "Failed to submit " + applicationId + " to YARN : " + appReport.getDiagnostics()); } LOG.info("Submitted application " + applicationId); break; } long elapsedMillis = System.currentTimeMillis() - startTime; if (enforceAsyncAPITimeout() && elapsedMillis >= asyncApiPollTimeoutMillis) { throw new YarnException( "Timed out while waiting for application " + applicationId + " to be submitted successfully"); } // Notify the client through the log every 10 poll, in case the client // is blocked here too long. if (++pollCount % 10 == 0) { LOG.info( "Application submission is not finished, " + "submitted application " + applicationId + " is still in " + state); } try { Thread.sleep(submitPollIntervalMillis); } catch (InterruptedException ie) { LOG.error( "Interrupted while waiting for application " + applicationId + " to be successfully submitted."); } } catch (ApplicationNotFoundException ex) { // FailOver or RM restart happens before RMStateStore saves // ApplicationState LOG.info( "Re-submit application " + applicationId + "with the " + "same ApplicationSubmissionContext"); rmClient.submitApplication(request); } } return applicationId; }
public static boolean hasAppFinished(ApplicationReport report) { return report == null || report.getYarnApplicationState().ordinal() >= YarnApplicationState.FINISHED.ordinal(); }
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, tmpInFile.getAbsoluteFile().toString(), tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort( apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info( "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } // String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue( "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue( "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile( "..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
/** Test TaskManager failure */ @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3" }, "Number of connected TaskManagers changed to 1. Slots available: 1", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JSONObject parsedTMs = new JSONObject(response); JSONArray taskManagers = parsedTMs.getJSONArray("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.length()); Assert.assertEquals(1, taskManagers.getJSONObject(0).getInt("slotsNumber")); // get the configuration from webinterface & check if the dynamic properties from YARN show up // there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); JSONArray parsed = new JSONArray(jsonConfig); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(parsed); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals( "unable to find hostname in " + parsed, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals( "unable to find port in " + parsed, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster/JobManager (Version")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManagerRunner.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching container") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue( "Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue( "Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue( "Expect to see new container started", eC.contains("Launching container") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }
public void run(String[] args) throws Exception { final String command = args[0]; final int n = Integer.valueOf(args[1]); final Path jarPath = new Path(args[2]); // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands( Collections.singletonList( "$JAVA_HOME/bin/java" + " -Xmx256M" + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); setupAppMasterJar(jarPath, appMasterJar); System.out.println("Jar name is " + jarPath.getName()); amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("apache-yarn-example"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); System.out.println("Submitting application " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { System.out.println("App Status = " + appState); Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println( "Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); }
@Private @Unstable public static ApplicationReport newInstance( ApplicationId applicationId, ApplicationAttemptId applicationAttemptId, String user, String queue, String name, String host, int rpcPort, Token clientToAMToken, YarnApplicationState state, String diagnostics, String url, long startTime, long finishTime, FinalApplicationStatus finalStatus, ApplicationResourceUsageReport appResources, String origTrackingUrl, float progress, String applicationType, Token amRmToken) { ApplicationReport report = Records.newRecord(ApplicationReport.class); report.setApplicationId(applicationId); report.setCurrentApplicationAttemptId(applicationAttemptId); report.setUser(user); report.setQueue(queue); report.setName(name); report.setHost(host); report.setRpcPort(rpcPort); report.setClientToAMToken(clientToAMToken); report.setYarnApplicationState(state); report.setDiagnostics(diagnostics); report.setTrackingUrl(url); report.setStartTime(startTime); report.setFinishTime(finishTime); report.setFinalApplicationStatus(finalStatus); report.setApplicationResourceUsageReport(appResources); report.setOriginalTrackingUrl(origTrackingUrl); report.setProgress(progress); report.setApplicationType(applicationType); report.setAMRMToken(amRmToken); return report; }
/** * Verify the web service deployment and lifecycle functionality * * @throws Exception */ @Ignore // disabled due to web service init delay issue @Test public void testWebService() throws Exception { // single container topology of inline input and module Properties props = new Properties(); props.put( StreamingApplication.DT_PREFIX + "stream.input.classname", TestGeneratorInputOperator.class.getName()); props.put(StreamingApplication.DT_PREFIX + "stream.input.outputNode", "module1"); props.put( StreamingApplication.DT_PREFIX + "module.module1.classname", GenericTestOperator.class.getName()); LOG.info("Initializing Client"); LogicalPlanConfiguration tb = new LogicalPlanConfiguration(new Configuration(false)); tb.addFromProperties(props, null); StramClient client = new StramClient(new Configuration(yarnCluster.getConfig()), createDAG(tb)); if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) { client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster } try { client.start(); client.startApplication(); // attempt web service connection ApplicationReport appReport = client.getApplicationReport(); Thread.sleep(5000); // delay to give web service time to fully initialize Client wsClient = Client.create(); wsClient.setFollowRedirects(true); WebResource r = wsClient .resource("http://" + appReport.getTrackingUrl()) .path(StramWebServices.PATH) .path(StramWebServices.PATH_INFO); LOG.info("Requesting: " + r.getURI()); ClientResponse response = r.accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); LOG.info("Got response: " + json.toString()); assertEquals("incorrect number of elements", 1, json.length()); assertEquals("appId", appReport.getApplicationId().toString(), json.get("id")); r = wsClient .resource("http://" + appReport.getTrackingUrl()) .path(StramWebServices.PATH) .path(StramWebServices.PATH_PHYSICAL_PLAN_OPERATORS); LOG.info("Requesting: " + r.getURI()); response = r.accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); json = response.getEntity(JSONObject.class); LOG.info("Got response: " + json.toString()); } finally { // LOG.info("waiting..."); // synchronized (this) { // this.wait(); // } // boolean result = client.monitorApplication(); client.killApplication(); client.stop(); } }
private List<ApplicationReport> createAppReports() { ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationReport newApplicationReport = ApplicationReport.newInstance( applicationId, ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", "appname", "host", 124, null, YarnApplicationState.RUNNING, "diagnostics", "url", 0, 0, FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.53789f, "YARN", null); List<ApplicationReport> applicationReports = new ArrayList<ApplicationReport>(); applicationReports.add(newApplicationReport); List<ApplicationAttemptReport> appAttempts = new ArrayList<ApplicationAttemptReport>(); ApplicationAttemptReport attempt = ApplicationAttemptReport.newInstance( ApplicationAttemptId.newInstance(applicationId, 1), "host", 124, "url", "oUrl", "diagnostics", YarnApplicationAttemptState.FINISHED, ContainerId.newContainerId(newApplicationReport.getCurrentApplicationAttemptId(), 1)); appAttempts.add(attempt); ApplicationAttemptReport attempt1 = ApplicationAttemptReport.newInstance( ApplicationAttemptId.newInstance(applicationId, 2), "host", 124, "url", "oUrl", "diagnostics", YarnApplicationAttemptState.FINISHED, ContainerId.newContainerId(newApplicationReport.getCurrentApplicationAttemptId(), 2)); appAttempts.add(attempt1); attempts.put(applicationId, appAttempts); List<ContainerReport> containerReports = new ArrayList<ContainerReport>(); ContainerReport container = ContainerReport.newInstance( ContainerId.newContainerId(attempt.getApplicationAttemptId(), 1), null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE, "http://" + NodeId.newInstance("host", 2345).toString()); containerReports.add(container); ContainerReport container1 = ContainerReport.newInstance( ContainerId.newContainerId(attempt.getApplicationAttemptId(), 2), null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE, "http://" + NodeId.newInstance("host", 2345).toString()); containerReports.add(container1); containers.put(attempt.getApplicationAttemptId(), containerReports); ApplicationId applicationId2 = ApplicationId.newInstance(1234, 6); ApplicationReport newApplicationReport2 = ApplicationReport.newInstance( applicationId2, ApplicationAttemptId.newInstance(applicationId2, 2), "user2", "queue2", "appname2", "host2", 125, null, YarnApplicationState.FINISHED, "diagnostics2", "url2", 2, 2, FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.63789f, "NON-YARN", null); applicationReports.add(newApplicationReport2); ApplicationId applicationId3 = ApplicationId.newInstance(1234, 7); ApplicationReport newApplicationReport3 = ApplicationReport.newInstance( applicationId3, ApplicationAttemptId.newInstance(applicationId3, 3), "user3", "queue3", "appname3", "host3", 126, null, YarnApplicationState.RUNNING, "diagnostics3", "url3", 3, 3, FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.73789f, "MAPREDUCE", null); applicationReports.add(newApplicationReport3); ApplicationId applicationId4 = ApplicationId.newInstance(1234, 8); ApplicationReport newApplicationReport4 = ApplicationReport.newInstance( applicationId4, ApplicationAttemptId.newInstance(applicationId4, 4), "user4", "queue4", "appname4", "host4", 127, null, YarnApplicationState.FAILED, "diagnostics4", "url4", 4, 4, FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.83789f, "NON-MAPREDUCE", null); applicationReports.add(newApplicationReport4); return applicationReports; }
@Override public GetApplicationReportResponse getApplicationReport(GetApplicationReportRequest request) throws IOException { ApplicationId applicationId = request.getApplicationId(); ApplicationReport application = recordFactory.newRecordInstance(ApplicationReport.class); application.setApplicationId(applicationId); application.setFinalApplicationStatus(FinalApplicationStatus.UNDEFINED); if (amRunning) { application.setYarnApplicationState(YarnApplicationState.RUNNING); } else if (amRestarting) { application.setYarnApplicationState(YarnApplicationState.SUBMITTED); } else { application.setYarnApplicationState(YarnApplicationState.FINISHED); application.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); } String[] split = AMHOSTADDRESS.split(":"); application.setHost(split[0]); application.setRpcPort(Integer.parseInt(split[1])); application.setUser("TestClientRedirect-user"); application.setName("N/A"); application.setQueue("N/A"); application.setStartTime(0); application.setFinishTime(0); application.setTrackingUrl("N/A"); application.setDiagnostics("N/A"); GetApplicationReportResponse response = recordFactory.newRecordInstance(GetApplicationReportResponse.class); response.setApplicationReport(application); return response; }