예제 #1
0
 public static String appReportToString(ApplicationReport r, String separator) {
   StringBuilder builder = new StringBuilder(512);
   builder.append("application ").append(r.getName()).append("/").append(r.getApplicationType());
   builder.append(separator).append("state: ").append(r.getYarnApplicationState());
   builder.append(separator).append("URL: ").append(r.getTrackingUrl());
   builder
       .append(separator)
       .append("Started ")
       .append(new Date(r.getStartTime()).toLocaleString());
   long finishTime = r.getFinishTime();
   if (finishTime > 0) {
     builder.append(separator).append("Finished ").append(new Date(finishTime).toLocaleString());
   }
   builder
       .append(separator)
       .append("RPC :")
       .append(r.getHost())
       .append(':')
       .append(r.getRpcPort());
   String diagnostics = r.getDiagnostics();
   if (!diagnostics.isEmpty()) {
     builder.append(separator).append("Diagnostics :").append(diagnostics);
   }
   return builder.toString();
 }
예제 #2
0
  /** Test regular operation, including command line parameter parsing. */
  @Test(timeout = 60000) // timeout after a minute.
  public void testDetachedMode() {
    LOG.info("Starting testDetachedMode()");
    addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
    Runner runner =
        startWithArgs(
            new String[] {
              "-j",
              flinkUberjar.getAbsolutePath(),
              "-t",
              flinkLibFolder.getAbsolutePath(),
              "-n",
              "1",
              "-jm",
              "768",
              "-tm",
              "1024",
              "--name",
              "MyCustomName", // test setting a custom name
              "--detached"
            },
            "Flink JobManager is now running on",
            RunTypes.YARN_SESSION);

    checkForLogString("The Flink YARN client has been started in detached mode");

    Assert.assertFalse("The runner should detach.", runner.isAlive());

    LOG.info("Waiting until two containers are running");
    // wait until two containers are running
    while (getRunningContainers() < 2) {
      sleep(500);
    }
    LOG.info("Two containers are running. Killing the application");

    // kill application "externally".
    try {
      YarnClient yc = YarnClient.createYarnClient();
      yc.init(yarnConfiguration);
      yc.start();
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
      Assert.assertEquals(1, apps.size()); // Only one running
      ApplicationReport app = apps.get(0);

      Assert.assertEquals("MyCustomName", app.getName());
      ApplicationId id = app.getApplicationId();
      yc.killApplication(id);

      while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
        sleep(500);
      }
    } catch (Throwable t) {
      LOG.warn("Killing failed", t);
      Assert.fail();
    }

    LOG.info("Finished testDetachedMode()");
  }
예제 #3
0
  /**
   * convert an AM report to a string for diagnostics
   *
   * @param report the report
   * @return the string value
   */
  public static String reportToString(ApplicationReport report) {
    if (report == null) {
      return "Null application report";
    }

    return "App "
        + report.getName()
        + "/"
        + report.getApplicationType()
        + "# "
        + report.getApplicationId()
        + " user "
        + report.getUser()
        + " is in state "
        + report.getYarnApplicationState()
        + "RPC: "
        + report.getHost()
        + ":"
        + report.getRpcPort();
  }
예제 #4
0
  /** Test TaskManager failure */
  @Test(timeout = 100000) // timeout after 100 seconds
  public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner =
        startWithArgs(
            new String[] {
              "-j",
              flinkUberjar.getAbsolutePath(),
              "-t",
              flinkLibFolder.getAbsolutePath(),
              "-n",
              "1",
              "-jm",
              "768",
              "-tm",
              "1024",
              "-nm",
              "customName",
              "-Dfancy-configuration-value=veryFancy",
              "-Dyarn.maximum-failed-containers=3"
            },
            "Number of connected TaskManagers changed to 1. Slots available: 1",
            RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------
    try {
      YarnClient yc = YarnClient.createYarnClient();
      yc.init(yarnConfiguration);
      yc.start();
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
      Assert.assertEquals(1, apps.size()); // Only one running
      ApplicationReport app = apps.get(0);
      Assert.assertEquals("customName", app.getName());
      String url = app.getTrackingUrl();
      if (!url.endsWith("/")) {
        url += "/";
      }
      if (!url.startsWith("http://")) {
        url = "http://" + url;
      }
      LOG.info("Got application URL from YARN {}", url);

      String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
      JSONObject parsedTMs = new JSONObject(response);
      JSONArray taskManagers = parsedTMs.getJSONArray("taskmanagers");
      Assert.assertNotNull(taskManagers);
      Assert.assertEquals(1, taskManagers.length());
      Assert.assertEquals(1, taskManagers.getJSONObject(0).getInt("slotsNumber"));

      // get the configuration from webinterface & check if the dynamic properties from YARN show up
      // there.
      String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
      JSONArray parsed = new JSONArray(jsonConfig);
      Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(parsed);

      Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
      Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));

      // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
      // first, get the hostname/port
      String oC = outContent.toString();
      Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
      Matcher matches = p.matcher(oC);
      String hostname = null;
      String port = null;
      while (matches.find()) {
        hostname = matches.group(1).toLowerCase();
        port = matches.group(2);
      }
      LOG.info("Extracted hostname:port: {} {}", hostname, port);

      Assert.assertEquals(
          "unable to find hostname in " + parsed,
          hostname,
          parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
      Assert.assertEquals(
          "unable to find port in " + parsed,
          port,
          parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

      // test logfile access
      String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
      Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster/JobManager (Version"));
    } catch (Throwable e) {
      LOG.warn("Error while running test", e);
      Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager  -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
      remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
      LOG.warn("Unable to get curr user", e);
      Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
      NodeManager nm = yarnCluster.getNodeManager(nmId);
      ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
      for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
        String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
        if (command.contains(YarnTaskManagerRunner.class.getSimpleName())) {
          taskManagerContainer = entry.getKey();
          nodeManager = nm;
          nmIdent =
              new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
          // allow myself to do stuff with the container
          // remoteUgi.addCredentials(entry.getValue().getCredentials());
          remoteUgi.addTokenIdentifier(nmIdent);
        }
      }
      sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
      nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
      LOG.warn("Error stopping container", e);
      Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
      LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

      String o = errContent.toString();
      int killedOff = o.indexOf("Container killed by the ApplicationMaster");
      if (killedOff != -1) {
        o = o.substring(killedOff);
        ok = o.indexOf("Launching container") > 0;
      }
      sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
      runner.join(1000);
    } catch (InterruptedException e) {
      LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue(
        "Expect to see failed container", eC.contains("New messages from the YARN cluster"));
    Assert.assertTrue(
        "Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
    Assert.assertTrue(
        "Expect to see new container started",
        eC.contains("Launching container") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
  }