@Override public boolean hasFailed() { if (!isConnected) { throw new IllegalStateException("The cluster has been connected to the ApplicationMaster."); } if (pollingRunner == null) { LOG.warn( "FlinkYarnCluster.hasFailed() has been called on an uninitialized cluster." + "The system might be in an erroneous state"); } ApplicationReport lastReport = pollingRunner.getLastReport(); if (lastReport == null) { LOG.warn( "FlinkYarnCluster.hasFailed() has been called on a cluster that didn't receive a status so far." + "The system might be in an erroneous state"); return false; } else { YarnApplicationState appState = lastReport.getYarnApplicationState(); boolean status = (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED); if (status) { LOG.warn("YARN reported application state {}", appState); LOG.warn("Diagnostics: {}", lastReport.getDiagnostics()); } return status; } }
public TezSessionStatus getSessionStatus() throws TezException, IOException { try { ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); switch (appReport.getYarnApplicationState()) { case NEW: case NEW_SAVING: case ACCEPTED: case SUBMITTED: return TezSessionStatus.INITIALIZING; case FINISHED: case FAILED: case KILLED: return TezSessionStatus.SHUTDOWN; case RUNNING: try { DAGClientAMProtocolBlockingPB proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy == null) { return TezSessionStatus.INITIALIZING; } GetAMStatusResponseProto response = proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build()); return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus()); } catch (TezException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } catch (ServiceException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } } } catch (YarnException e) { throw new TezException(e); } return TezSessionStatus.INITIALIZING; }
public static String appReportToString(ApplicationReport r, String separator) { StringBuilder builder = new StringBuilder(512); builder.append("application ").append(r.getName()).append("/").append(r.getApplicationType()); builder.append(separator).append("state: ").append(r.getYarnApplicationState()); builder.append(separator).append("URL: ").append(r.getTrackingUrl()); builder .append(separator) .append("Started ") .append(new Date(r.getStartTime()).toLocaleString()); long finishTime = r.getFinishTime(); if (finishTime > 0) { builder.append(separator).append("Finished ").append(new Date(finishTime).toLocaleString()); } builder .append(separator) .append("RPC :") .append(r.getHost()) .append(':') .append(r.getRpcPort()); String diagnostics = r.getDiagnostics(); if (!diagnostics.isEmpty()) { builder.append(separator).append("Diagnostics :").append(diagnostics); } return builder.toString(); }
public void setYarnApplicationState(YarnApplicationState state) { when(mockReport.getYarnApplicationState()) .thenReturn( YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.NEW_SAVING, state); }
@Before public void checkClusterEmpty() throws IOException, YarnException { if (yarnClient == null) { yarnClient = YarnClient.createYarnClient(); yarnClient.init(yarnConfiguration); yarnClient.start(); } List<ApplicationReport> apps = yarnClient.getApplications(); for (ApplicationReport app : apps) { if (app.getYarnApplicationState() != YarnApplicationState.FINISHED && app.getYarnApplicationState() != YarnApplicationState.KILLED && app.getYarnApplicationState() != YarnApplicationState.FAILED) { Assert.fail( "There is at least one application on the cluster is not finished." + "App " + app.getApplicationId() + " is in state " + app.getYarnApplicationState()); } } }
private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception { try { long start = System.currentTimeMillis(); ApplicationReport report = rmClient.getApplicationReport(appId); while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) { if (System.currentTimeMillis() - start > 20 * 1000) { throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state"); } Thread.sleep(200); report = rmClient.getApplicationReport(appId); } } catch (Exception ex) { throw new Exception(ex); } }
/** * convert an AM report to a string for diagnostics * * @param report the report * @return the string value */ public static String reportToString(ApplicationReport report) { if (report == null) { return "Null application report"; } return "App " + report.getName() + "/" + report.getApplicationType() + "# " + report.getApplicationId() + " user " + report.getUser() + " is in state " + report.getYarnApplicationState() + "RPC: " + report.getHost() + ":" + report.getRpcPort(); }
@Test(timeout = 20000) public void testJobSubmissionFailure() throws Exception { when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))) .thenReturn(appId); ApplicationReport report = mock(ApplicationReport.class); when(report.getApplicationId()).thenReturn(appId); when(report.getDiagnostics()).thenReturn(failString); when(report.getYarnApplicationState()).thenReturn(YarnApplicationState.FAILED); when(resourceMgrDelegate.getApplicationReport(appId)).thenReturn(report); Credentials credentials = new Credentials(); File jobxml = new File(testWorkDir, "job.xml"); OutputStream out = new FileOutputStream(jobxml); conf.writeXml(out); out.close(); try { yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials); } catch (IOException io) { LOG.info("Logging exception:", io); assertTrue(io.getLocalizedMessage().contains(failString)); } }
private List<ApplicationReport> getApplicationReports( List<ApplicationReport> applicationReports, Set<String> applicationTypes, EnumSet<YarnApplicationState> applicationStates) { List<ApplicationReport> appReports = new ArrayList<ApplicationReport>(); for (ApplicationReport appReport : applicationReports) { if (applicationTypes != null && !applicationTypes.isEmpty()) { if (!applicationTypes.contains(appReport.getApplicationType())) { continue; } } if (applicationStates != null && !applicationStates.isEmpty()) { if (!applicationStates.contains(appReport.getYarnApplicationState())) { continue; } } appReports.add(appReport); } return appReports; }
@Override public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { ApplicationId applicationId = appContext.getApplicationId(); if (applicationId == null) { throw new ApplicationIdNotProvidedException( "ApplicationId is not provided in ApplicationSubmissionContext"); } SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class); request.setApplicationSubmissionContext(appContext); // Automatically add the timeline DT into the CLC // Only when the security and the timeline service are both enabled if (isSecurityEnabled() && timelineServiceEnabled) { addTimelineDelegationToken(appContext.getAMContainerSpec()); } // TODO: YARN-1763:Handle RM failovers during the submitApplication call. rmClient.submitApplication(request); int pollCount = 0; long startTime = System.currentTimeMillis(); EnumSet<YarnApplicationState> waitingStates = EnumSet.of( YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.SUBMITTED); EnumSet<YarnApplicationState> failToSubmitStates = EnumSet.of(YarnApplicationState.FAILED, YarnApplicationState.KILLED); while (true) { try { ApplicationReport appReport = getApplicationReport(applicationId); YarnApplicationState state = appReport.getYarnApplicationState(); if (!waitingStates.contains(state)) { if (failToSubmitStates.contains(state)) { throw new YarnException( "Failed to submit " + applicationId + " to YARN : " + appReport.getDiagnostics()); } LOG.info("Submitted application " + applicationId); break; } long elapsedMillis = System.currentTimeMillis() - startTime; if (enforceAsyncAPITimeout() && elapsedMillis >= asyncApiPollTimeoutMillis) { throw new YarnException( "Timed out while waiting for application " + applicationId + " to be submitted successfully"); } // Notify the client through the log every 10 poll, in case the client // is blocked here too long. if (++pollCount % 10 == 0) { LOG.info( "Application submission is not finished, " + "submitted application " + applicationId + " is still in " + state); } try { Thread.sleep(submitPollIntervalMillis); } catch (InterruptedException ie) { LOG.error( "Interrupted while waiting for application " + applicationId + " to be successfully submitted."); } } catch (ApplicationNotFoundException ex) { // FailOver or RM restart happens before RMStateStore saves // ApplicationState LOG.info( "Re-submit application " + applicationId + "with the " + "same ApplicationSubmissionContext"); rmClient.submitApplication(request); } } return applicationId; }
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, tmpInFile.getAbsoluteFile().toString(), tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort( apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info( "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } // String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue( "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue( "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile( "..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
public void run(String[] args) throws Exception { final String command = args[0]; final int n = Integer.valueOf(args[1]); final Path jarPath = new Path(args[2]); // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands( Collections.singletonList( "$JAVA_HOME/bin/java" + " -Xmx256M" + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); setupAppMasterJar(jarPath, appMasterJar); System.out.println("Jar name is " + jarPath.getName()); amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("apache-yarn-example"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); System.out.println("Submitting application " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { System.out.println("App Status = " + appState); Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println( "Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); }
public static boolean hasAppFinished(ApplicationReport report) { return report == null || report.getYarnApplicationState().ordinal() >= YarnApplicationState.FINISHED.ordinal(); }