private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception { try { long start = System.currentTimeMillis(); ApplicationReport report = rmClient.getApplicationReport(appId); while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) { if (System.currentTimeMillis() - start > 20 * 1000) { throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state"); } Thread.sleep(200); report = rmClient.getApplicationReport(appId); } } catch (Exception ex) { throw new Exception(ex); } }
public TezSessionStatus getSessionStatus() throws TezException, IOException { try { ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); switch (appReport.getYarnApplicationState()) { case NEW: case NEW_SAVING: case ACCEPTED: case SUBMITTED: return TezSessionStatus.INITIALIZING; case FINISHED: case FAILED: case KILLED: return TezSessionStatus.SHUTDOWN; case RUNNING: try { DAGClientAMProtocolBlockingPB proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy == null) { return TezSessionStatus.INITIALIZING; } GetAMStatusResponseProto response = proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build()); return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus()); } catch (TezException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } catch (ServiceException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } } } catch (YarnException e) { throw new TezException(e); } return TezSessionStatus.INITIALIZING; }
@Override public void run() { while (running.get() && yarnClient.isInState(Service.STATE.STARTED)) { try { ApplicationReport report = yarnClient.getApplicationReport(appId); synchronized (lock) { lastReport = report; } } catch (Exception e) { LOG.warn("Error while getting application report", e); } try { Thread.sleep(FlinkYarnCluster.POLLING_THREAD_INTERVAL_MS); } catch (InterruptedException e) { LOG.error("Polling thread got interrupted", e); Thread.currentThread().interrupt(); // pass interrupt. } } if (running.get() && !yarnClient.isInState(Service.STATE.STARTED)) { // == if the polling thread is still running but the yarn client is stopped. LOG.warn("YARN client is unexpected in state " + yarnClient.getServiceState()); } }
/** * Create a new Flink on YARN cluster. * * @param yarnClient * @param appId the YARN application ID * @param hadoopConfig * @param flinkConfig * @param sessionFilesDir * @param detached Set to true if no actor system or RPC communication with the cluster should be * established * @throws IOException * @throws YarnException */ public FlinkYarnCluster( final YarnClient yarnClient, final ApplicationId appId, Configuration hadoopConfig, org.apache.flink.configuration.Configuration flinkConfig, Path sessionFilesDir, boolean detached) throws IOException, YarnException { this.akkaDuration = AkkaUtils.getTimeout(flinkConfig); this.akkaTimeout = Timeout.durationToTimeout(akkaDuration); this.yarnClient = yarnClient; this.hadoopConfig = hadoopConfig; this.sessionFilesDir = sessionFilesDir; this.applicationId = appId; this.detached = detached; this.flinkConfig = flinkConfig; this.appId = appId; // get one application report manually intialAppReport = yarnClient.getApplicationReport(appId); String jobManagerHost = intialAppReport.getHost(); int jobManagerPort = intialAppReport.getRpcPort(); this.jobManagerAddress = new InetSocketAddress(jobManagerHost, jobManagerPort); }
public ApplicationReport getApplicationReport() throws YarnException, IOException { return yarnClient.getApplicationReport(this.appId); }
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, tmpInFile.getAbsoluteFile().toString(), tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort( apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info( "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } // String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue( "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue( "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile( "..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
public void run(String[] args) throws Exception { final String command = args[0]; final int n = Integer.valueOf(args[1]); final Path jarPath = new Path(args[2]); // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands( Collections.singletonList( "$JAVA_HOME/bin/java" + " -Xmx256M" + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); setupAppMasterJar(jarPath, appMasterJar); System.out.println("Jar name is " + jarPath.getName()); amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("apache-yarn-example"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); System.out.println("Submitting application " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { System.out.println("App Status = " + appState); Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println( "Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); }