/** Test regular operation, including command line parameter parsing. */ @Test(timeout = 60000) // timeout after a minute. public void testDetachedMode() { LOG.info("Starting testDetachedMode()"); addTestAppender(FlinkYarnSessionCli.class, Level.INFO); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "--name", "MyCustomName", // test setting a custom name "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION); checkForLogString("The Flink YARN client has been started in detached mode"); Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("Waiting until two containers are running"); // wait until two containers are running while (getRunningContainers() < 2) { sleep(500); } LOG.info("Two containers are running. Killing the application"); // kill application "externally". try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("MyCustomName", app.getName()); ApplicationId id = app.getApplicationId(); yc.killApplication(id); while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) { sleep(500); } } catch (Throwable t) { LOG.warn("Killing failed", t); Assert.fail(); } LOG.info("Finished testDetachedMode()"); }
/** This method returns once the "startedAfterString" has been seen. */ protected Runner startWithArgs(String[] args, String startedAfterString, RunTypes type) { LOG.info("Running with args {}", Arrays.toString(args)); outContent = new ByteArrayOutputStream(); errContent = new ByteArrayOutputStream(); System.setOut(new PrintStream(outContent)); System.setErr(new PrintStream(errContent)); final int START_TIMEOUT_SECONDS = 60; Runner runner = new Runner(args, type, 0); runner.setName("Frontend (CLI/YARN Client) runner thread (startWithArgs())."); runner.start(); for (int second = 0; second < START_TIMEOUT_SECONDS; second++) { sleep(1000); // check output for correct TaskManager startup. if (outContent.toString().contains(startedAfterString) || errContent.toString().contains(startedAfterString)) { LOG.info("Found expected output in redirected streams"); return runner; } // check if thread died if (!runner.isAlive()) { sendOutput(); if (runner.getRunnerError() != null) { throw new RuntimeException("Runner failed with exception.", runner.getRunnerError()); } Assert.fail("Runner thread died before the test was finished."); } } sendOutput(); Assert.fail( "During the timeout period of " + START_TIMEOUT_SECONDS + " seconds the " + "expected string did not show up"); return null; }
/** * The test has been passed once the "terminateAfterString" has been seen. * * @param args Command line arguments for the runner * @param terminateAfterString the runner is searching the stdout and stderr for this string. as * soon as it appears, the test has passed * @param failOnPatterns The runner is searching stdout and stderr for the pattern (regexp) * specified here. If one appears, the test has failed * @param type Set the type of the runner * @param expectedReturnValue Expected return code from the runner. * @param checkLogForTerminateString If true, the runner checks also the log4j logger for the * terminate string */ protected void runWithArgs( String[] args, String terminateAfterString, String[] failOnPatterns, RunTypes type, int expectedReturnValue, boolean checkLogForTerminateString) { LOG.info("Running with args {}", Arrays.toString(args)); outContent = new ByteArrayOutputStream(); errContent = new ByteArrayOutputStream(); System.setOut(new PrintStream(outContent)); System.setErr(new PrintStream(errContent)); // we wait for at most three minutes final int START_TIMEOUT_SECONDS = 180; final long deadline = System.currentTimeMillis() + (START_TIMEOUT_SECONDS * 1000); Runner runner = new Runner(args, type, expectedReturnValue); runner.start(); boolean expectedStringSeen = false; boolean testPassedFromLog4j = false; do { sleep(1000); String outContentString = outContent.toString(); String errContentString = errContent.toString(); if (failOnPatterns != null) { for (String failOnString : failOnPatterns) { Pattern pattern = Pattern.compile(failOnString); if (pattern.matcher(outContentString).find() || pattern.matcher(errContentString).find()) { LOG.warn("Failing test. Output contained illegal string '" + failOnString + "'"); sendOutput(); // stopping runner. runner.sendStop(); Assert.fail("Output contained illegal string '" + failOnString + "'"); } } } // check output for the expected terminateAfterString. if (checkLogForTerminateString) { LoggingEvent matchedEvent = UtilsTest.getEventContainingString(terminateAfterString); if (matchedEvent != null) { testPassedFromLog4j = true; LOG.info("Found expected output in logging event {}", matchedEvent); } } if (outContentString.contains(terminateAfterString) || errContentString.contains(terminateAfterString) || testPassedFromLog4j) { expectedStringSeen = true; LOG.info("Found expected output in redirected streams"); // send "stop" command to command line interface LOG.info("RunWithArgs: request runner to stop"); runner.sendStop(); // wait for the thread to stop try { runner.join(30000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("RunWithArgs runner stopped."); } else { // check if thread died if (!runner.isAlive()) { // leave loop: the runner died, so we can not expect new strings to show up. break; } } } while (runner.getRunnerError() == null && !expectedStringSeen && System.currentTimeMillis() < deadline); sendOutput(); if (runner.getRunnerError() != null) { // this lets the test fail. throw new RuntimeException("Runner failed", runner.getRunnerError()); } Assert.assertTrue( "During the timeout period of " + START_TIMEOUT_SECONDS + " seconds the " + "expected string did not show up", expectedStringSeen); LOG.info("Test was successful"); }
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, tmpInFile.getAbsoluteFile().toString(), tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort( apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info( "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } // String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue( "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue( "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile( "..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }