/** Test regular operation, including command line parameter parsing. */
  @Test(timeout = 60000) // timeout after a minute.
  public void testDetachedMode() {
    LOG.info("Starting testDetachedMode()");
    addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
    Runner runner =
        startWithArgs(
            new String[] {
              "-j",
              flinkUberjar.getAbsolutePath(),
              "-t",
              flinkLibFolder.getAbsolutePath(),
              "-n",
              "1",
              "-jm",
              "768",
              "-tm",
              "1024",
              "--name",
              "MyCustomName", // test setting a custom name
              "--detached"
            },
            "Flink JobManager is now running on",
            RunTypes.YARN_SESSION);

    checkForLogString("The Flink YARN client has been started in detached mode");

    Assert.assertFalse("The runner should detach.", runner.isAlive());

    LOG.info("Waiting until two containers are running");
    // wait until two containers are running
    while (getRunningContainers() < 2) {
      sleep(500);
    }
    LOG.info("Two containers are running. Killing the application");

    // kill application "externally".
    try {
      YarnClient yc = YarnClient.createYarnClient();
      yc.init(yarnConfiguration);
      yc.start();
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
      Assert.assertEquals(1, apps.size()); // Only one running
      ApplicationReport app = apps.get(0);

      Assert.assertEquals("MyCustomName", app.getName());
      ApplicationId id = app.getApplicationId();
      yc.killApplication(id);

      while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
        sleep(500);
      }
    } catch (Throwable t) {
      LOG.warn("Killing failed", t);
      Assert.fail();
    }

    LOG.info("Finished testDetachedMode()");
  }
Beispiel #2
0
  /** This method returns once the "startedAfterString" has been seen. */
  protected Runner startWithArgs(String[] args, String startedAfterString, RunTypes type) {
    LOG.info("Running with args {}", Arrays.toString(args));

    outContent = new ByteArrayOutputStream();
    errContent = new ByteArrayOutputStream();
    System.setOut(new PrintStream(outContent));
    System.setErr(new PrintStream(errContent));

    final int START_TIMEOUT_SECONDS = 60;

    Runner runner = new Runner(args, type, 0);
    runner.setName("Frontend (CLI/YARN Client) runner thread (startWithArgs()).");
    runner.start();

    for (int second = 0; second < START_TIMEOUT_SECONDS; second++) {
      sleep(1000);
      // check output for correct TaskManager startup.
      if (outContent.toString().contains(startedAfterString)
          || errContent.toString().contains(startedAfterString)) {
        LOG.info("Found expected output in redirected streams");
        return runner;
      }
      // check if thread died
      if (!runner.isAlive()) {
        sendOutput();
        if (runner.getRunnerError() != null) {
          throw new RuntimeException("Runner failed with exception.", runner.getRunnerError());
        }
        Assert.fail("Runner thread died before the test was finished.");
      }
    }

    sendOutput();
    Assert.fail(
        "During the timeout period of "
            + START_TIMEOUT_SECONDS
            + " seconds the "
            + "expected string did not show up");
    return null;
  }
Beispiel #3
0
  /**
   * The test has been passed once the "terminateAfterString" has been seen.
   *
   * @param args Command line arguments for the runner
   * @param terminateAfterString the runner is searching the stdout and stderr for this string. as
   *     soon as it appears, the test has passed
   * @param failOnPatterns The runner is searching stdout and stderr for the pattern (regexp)
   *     specified here. If one appears, the test has failed
   * @param type Set the type of the runner
   * @param expectedReturnValue Expected return code from the runner.
   * @param checkLogForTerminateString If true, the runner checks also the log4j logger for the
   *     terminate string
   */
  protected void runWithArgs(
      String[] args,
      String terminateAfterString,
      String[] failOnPatterns,
      RunTypes type,
      int expectedReturnValue,
      boolean checkLogForTerminateString) {
    LOG.info("Running with args {}", Arrays.toString(args));

    outContent = new ByteArrayOutputStream();
    errContent = new ByteArrayOutputStream();
    System.setOut(new PrintStream(outContent));
    System.setErr(new PrintStream(errContent));

    // we wait for at most three minutes
    final int START_TIMEOUT_SECONDS = 180;
    final long deadline = System.currentTimeMillis() + (START_TIMEOUT_SECONDS * 1000);

    Runner runner = new Runner(args, type, expectedReturnValue);
    runner.start();

    boolean expectedStringSeen = false;
    boolean testPassedFromLog4j = false;
    do {
      sleep(1000);
      String outContentString = outContent.toString();
      String errContentString = errContent.toString();
      if (failOnPatterns != null) {
        for (String failOnString : failOnPatterns) {
          Pattern pattern = Pattern.compile(failOnString);
          if (pattern.matcher(outContentString).find()
              || pattern.matcher(errContentString).find()) {
            LOG.warn("Failing test. Output contained illegal string '" + failOnString + "'");
            sendOutput();
            // stopping runner.
            runner.sendStop();
            Assert.fail("Output contained illegal string '" + failOnString + "'");
          }
        }
      }
      // check output for the expected terminateAfterString.
      if (checkLogForTerminateString) {
        LoggingEvent matchedEvent = UtilsTest.getEventContainingString(terminateAfterString);
        if (matchedEvent != null) {
          testPassedFromLog4j = true;
          LOG.info("Found expected output in logging event {}", matchedEvent);
        }
      }

      if (outContentString.contains(terminateAfterString)
          || errContentString.contains(terminateAfterString)
          || testPassedFromLog4j) {
        expectedStringSeen = true;
        LOG.info("Found expected output in redirected streams");
        // send "stop" command to command line interface
        LOG.info("RunWithArgs: request runner to stop");
        runner.sendStop();
        // wait for the thread to stop
        try {
          runner.join(30000);
        } catch (InterruptedException e) {
          LOG.warn("Interrupted while stopping runner", e);
        }
        LOG.warn("RunWithArgs runner stopped.");
      } else {
        // check if thread died
        if (!runner.isAlive()) {
          // leave loop: the runner died, so we can not expect new strings to show up.
          break;
        }
      }
    } while (runner.getRunnerError() == null
        && !expectedStringSeen
        && System.currentTimeMillis() < deadline);

    sendOutput();

    if (runner.getRunnerError() != null) {
      // this lets the test fail.
      throw new RuntimeException("Runner failed", runner.getRunnerError());
    }
    Assert.assertTrue(
        "During the timeout period of "
            + START_TIMEOUT_SECONDS
            + " seconds the "
            + "expected string did not show up",
        expectedStringSeen);

    LOG.info("Test was successful");
  }
  private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
      tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
      tmpInFile = tmp.newFile();
      FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    Runner runner =
        startWithArgs(
            new String[] {
              "run",
              "-m",
              "yarn-cluster",
              "-yj",
              flinkUberjar.getAbsolutePath(),
              "-yt",
              flinkLibFolder.getAbsolutePath(),
              "-yn",
              "1",
              "-yjm",
              "768",
              "-yD",
              "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
              "-ytm",
              "1024",
              "-ys",
              "2", // test requesting slots from YARN.
              "--yarndetached",
              job,
              tmpInFile.getAbsoluteFile().toString(),
              tmpOutFolder.getAbsoluteFile().toString()
            },
            "Job has been submitted with JobID",
            RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
      }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

      ApplicationId tmpAppId;
      if (apps.size() == 1) {
        // Better method to find the right appId. But sometimes the app is shutting down very fast
        // Only one running
        tmpAppId = apps.get(0).getApplicationId();

        LOG.info("waiting for the job with appId {} to finish", tmpAppId);
        // wait until the app has finished
        while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
          sleep(500);
        }
      } else {
        // get appId by finding the latest finished appid
        apps = yc.getApplications();
        Collections.sort(
            apps,
            new Comparator<ApplicationReport>() {
              @Override
              public int compare(ApplicationReport o1, ApplicationReport o2) {
                return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
              }
            });
        tmpAppId = apps.get(0).getApplicationId();
        LOG.info(
            "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
      }
      final ApplicationId id = tmpAppId;

      // now it has finished.
      // check the output files.
      File[] listOfOutputFiles = tmpOutFolder.listFiles();

      Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
      LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

      // read all output files in output folder to one output string
      String content = "";
      for (File f : listOfOutputFiles) {
        if (f.isFile()) {
          content += FileUtils.readFileToString(f) + "\n";
        }
      }
      // String content = FileUtils.readFileToString(taskmanagerOut);
      // check for some of the wordcount outputs.
      Assert.assertTrue(
          "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
          content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
      Assert.assertTrue(
          "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
          content.contains("der 29")
              || content.contains("(der,29)")
              || content.contains("(mind,1)"));

      // check if the heap size for the TaskManager was set correctly
      File jobmanagerLog =
          YarnTestBase.findFile(
              "..",
              new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                  return name.contains("jobmanager.log")
                      && dir.getAbsolutePath().contains(id.toString());
                }
              });
      Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
      content = FileUtils.readFileToString(jobmanagerLog);
      // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
      String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m";
      Assert.assertTrue(
          "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
          content.contains(expected));
      expected = " (2/2) (attempt #0) to ";
      Assert.assertTrue(
          "Expected string '"
              + expected
              + "' not found in JobManager log."
              + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
              + jobmanagerLog
              + "'",
          content.contains(expected));

      // make sure the detached app is really finished.
      LOG.info("Checking again that app has finished");
      ApplicationReport rep;
      do {
        sleep(500);
        rep = yc.getApplicationReport(id);
        LOG.info("Got report {}", rep);
      } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
      LOG.warn("Error while detached yarn session was running", t);
      Assert.fail(t.getMessage());
    }
  }