Exemplo n.º 1
0
  @Test
  public void testOperatorFailureRecovery() throws Exception {

    LogicalPlan dag = new LogicalPlan();
    dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.toURI().toString());
    FailingOperator badOperator = dag.addOperator("badOperator", FailingOperator.class);
    dag.getContextAttributes(badOperator).put(OperatorContext.RECOVERY_ATTEMPTS, 1);

    LOG.info("Initializing Client");
    StramClient client = new StramClient(conf, dag);
    if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) {
      client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster
    }
    try {
      client.start();
      client.startApplication();
      client.setClientTimeout(120000);

      boolean result = client.monitorApplication();

      LOG.info("Client run completed. Result=" + result);
      Assert.assertFalse("should fail", result);

      ApplicationReport ar = client.getApplicationReport();
      Assert.assertEquals(
          "should fail", FinalApplicationStatus.FAILED, ar.getFinalApplicationStatus());
      // unable to get the diagnostics message set by the AM here - see YARN-208
      // diagnostics message does not make it here even with Hadoop 2.2 (but works on standalone
      // cluster)
      // Assert.assertTrue("appReport " + ar, ar.getDiagnostics().contains("badOperator"));
    } finally {
      client.stop();
    }
  }
 public TezSessionStatus getSessionStatus() throws TezException, IOException {
   try {
     ApplicationReport appReport = yarnClient.getApplicationReport(applicationId);
     switch (appReport.getYarnApplicationState()) {
       case NEW:
       case NEW_SAVING:
       case ACCEPTED:
       case SUBMITTED:
         return TezSessionStatus.INITIALIZING;
       case FINISHED:
       case FAILED:
       case KILLED:
         return TezSessionStatus.SHUTDOWN;
       case RUNNING:
         try {
           DAGClientAMProtocolBlockingPB proxy =
               TezClientUtils.getSessionAMProxy(
                   yarnClient, sessionConfig.getYarnConfiguration(), applicationId);
           if (proxy == null) {
             return TezSessionStatus.INITIALIZING;
           }
           GetAMStatusResponseProto response =
               proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build());
           return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus());
         } catch (TezException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         } catch (ServiceException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         }
     }
   } catch (YarnException e) {
     throw new TezException(e);
   }
   return TezSessionStatus.INITIALIZING;
 }
Exemplo n.º 3
0
 @Override
 public boolean hasFailed() {
   if (!isConnected) {
     throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
   }
   if (pollingRunner == null) {
     LOG.warn(
         "FlinkYarnCluster.hasFailed() has been called on an uninitialized cluster."
             + "The system might be in an erroneous state");
   }
   ApplicationReport lastReport = pollingRunner.getLastReport();
   if (lastReport == null) {
     LOG.warn(
         "FlinkYarnCluster.hasFailed() has been called on a cluster that didn't receive a status so far."
             + "The system might be in an erroneous state");
     return false;
   } else {
     YarnApplicationState appState = lastReport.getYarnApplicationState();
     boolean status =
         (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED);
     if (status) {
       LOG.warn("YARN reported application state {}", appState);
       LOG.warn("Diagnostics: {}", lastReport.getDiagnostics());
     }
     return status;
   }
 }
Exemplo n.º 4
0
  /** Test regular operation, including command line parameter parsing. */
  @Test(timeout = 60000) // timeout after a minute.
  public void testDetachedMode() {
    LOG.info("Starting testDetachedMode()");
    addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
    Runner runner =
        startWithArgs(
            new String[] {
              "-j",
              flinkUberjar.getAbsolutePath(),
              "-t",
              flinkLibFolder.getAbsolutePath(),
              "-n",
              "1",
              "-jm",
              "768",
              "-tm",
              "1024",
              "--name",
              "MyCustomName", // test setting a custom name
              "--detached"
            },
            "Flink JobManager is now running on",
            RunTypes.YARN_SESSION);

    checkForLogString("The Flink YARN client has been started in detached mode");

    Assert.assertFalse("The runner should detach.", runner.isAlive());

    LOG.info("Waiting until two containers are running");
    // wait until two containers are running
    while (getRunningContainers() < 2) {
      sleep(500);
    }
    LOG.info("Two containers are running. Killing the application");

    // kill application "externally".
    try {
      YarnClient yc = YarnClient.createYarnClient();
      yc.init(yarnConfiguration);
      yc.start();
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
      Assert.assertEquals(1, apps.size()); // Only one running
      ApplicationReport app = apps.get(0);

      Assert.assertEquals("MyCustomName", app.getName());
      ApplicationId id = app.getApplicationId();
      yc.killApplication(id);

      while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
        sleep(500);
      }
    } catch (Throwable t) {
      LOG.warn("Killing failed", t);
      Assert.fail();
    }

    LOG.info("Finished testDetachedMode()");
  }
Exemplo n.º 5
0
 public static String appReportToString(ApplicationReport r, String separator) {
   StringBuilder builder = new StringBuilder(512);
   builder.append("application ").append(r.getName()).append("/").append(r.getApplicationType());
   builder.append(separator).append("state: ").append(r.getYarnApplicationState());
   builder.append(separator).append("URL: ").append(r.getTrackingUrl());
   builder
       .append(separator)
       .append("Started ")
       .append(new Date(r.getStartTime()).toLocaleString());
   long finishTime = r.getFinishTime();
   if (finishTime > 0) {
     builder.append(separator).append("Finished ").append(new Date(finishTime).toLocaleString());
   }
   builder
       .append(separator)
       .append("RPC :")
       .append(r.getHost())
       .append(':')
       .append(r.getRpcPort());
   String diagnostics = r.getDiagnostics();
   if (!diagnostics.isEmpty()) {
     builder.append(separator).append("Diagnostics :").append(diagnostics);
   }
   return builder.toString();
 }
Exemplo n.º 6
0
 private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception {
   try {
     long start = System.currentTimeMillis();
     ApplicationReport report = rmClient.getApplicationReport(appId);
     while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) {
       if (System.currentTimeMillis() - start > 20 * 1000) {
         throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state");
       }
       Thread.sleep(200);
       report = rmClient.getApplicationReport(appId);
     }
   } catch (Exception ex) {
     throw new Exception(ex);
   }
 }
Exemplo n.º 7
0
 public void setYarnApplicationState(YarnApplicationState state) {
   when(mockReport.getYarnApplicationState())
       .thenReturn(
           YarnApplicationState.NEW,
           YarnApplicationState.NEW_SAVING,
           YarnApplicationState.NEW_SAVING,
           state);
 }
Exemplo n.º 8
0
  @Override
  public String getDiagnostics() {
    if (!isConnected) {
      throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
    }

    if (!hasFailed()) {
      LOG.warn("getDiagnostics() called for cluster which is not in failed state");
    }
    ApplicationReport lastReport = pollingRunner.getLastReport();
    if (lastReport == null) {
      LOG.warn("Last report is null");
      return null;
    } else {
      return lastReport.getDiagnostics();
    }
  }
  @Test
  public void testHistoryServerNotConfigured() throws Exception {
    // RM doesn't have app report and job History Server is not configured
    ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(null, getRMDelegate());
    JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertEquals("N/A", jobStatus.getUsername());
    Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());

    // RM has app report and job History Server is not configured
    ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
    ApplicationReport applicationReport = getFinishedApplicationReport();
    when(rm.getApplicationReport(jobId.getAppId())).thenReturn(applicationReport);

    clientServiceDelegate = getClientServiceDelegate(null, rm);
    jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername());
    Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState());
  }
Exemplo n.º 10
0
 @Test(timeout = 20000)
 public void testJobSubmissionFailure() throws Exception {
   when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class)))
       .thenReturn(appId);
   ApplicationReport report = mock(ApplicationReport.class);
   when(report.getApplicationId()).thenReturn(appId);
   when(report.getDiagnostics()).thenReturn(failString);
   when(report.getYarnApplicationState()).thenReturn(YarnApplicationState.FAILED);
   when(resourceMgrDelegate.getApplicationReport(appId)).thenReturn(report);
   Credentials credentials = new Credentials();
   File jobxml = new File(testWorkDir, "job.xml");
   OutputStream out = new FileOutputStream(jobxml);
   conf.writeXml(out);
   out.close();
   try {
     yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials);
   } catch (IOException io) {
     LOG.info("Logging exception:", io);
     assertTrue(io.getLocalizedMessage().contains(failString));
   }
 }
Exemplo n.º 11
0
    private List<ApplicationReport> getApplicationReports(
        List<ApplicationReport> applicationReports,
        Set<String> applicationTypes,
        EnumSet<YarnApplicationState> applicationStates) {

      List<ApplicationReport> appReports = new ArrayList<ApplicationReport>();
      for (ApplicationReport appReport : applicationReports) {
        if (applicationTypes != null && !applicationTypes.isEmpty()) {
          if (!applicationTypes.contains(appReport.getApplicationType())) {
            continue;
          }
        }

        if (applicationStates != null && !applicationStates.isEmpty()) {
          if (!applicationStates.contains(appReport.getYarnApplicationState())) {
            continue;
          }
        }
        appReports.add(appReport);
      }
      return appReports;
    }
Exemplo n.º 12
0
  @Before
  public void checkClusterEmpty() throws IOException, YarnException {
    if (yarnClient == null) {
      yarnClient = YarnClient.createYarnClient();
      yarnClient.init(yarnConfiguration);
      yarnClient.start();
    }

    List<ApplicationReport> apps = yarnClient.getApplications();
    for (ApplicationReport app : apps) {
      if (app.getYarnApplicationState() != YarnApplicationState.FINISHED
          && app.getYarnApplicationState() != YarnApplicationState.KILLED
          && app.getYarnApplicationState() != YarnApplicationState.FAILED) {
        Assert.fail(
            "There is at least one application on the cluster is not finished."
                + "App "
                + app.getApplicationId()
                + " is in state "
                + app.getYarnApplicationState());
      }
    }
  }
Exemplo n.º 13
0
  /**
   * Create a new Flink on YARN cluster.
   *
   * @param yarnClient
   * @param appId the YARN application ID
   * @param hadoopConfig
   * @param flinkConfig
   * @param sessionFilesDir
   * @param detached Set to true if no actor system or RPC communication with the cluster should be
   *     established
   * @throws IOException
   * @throws YarnException
   */
  public FlinkYarnCluster(
      final YarnClient yarnClient,
      final ApplicationId appId,
      Configuration hadoopConfig,
      org.apache.flink.configuration.Configuration flinkConfig,
      Path sessionFilesDir,
      boolean detached)
      throws IOException, YarnException {
    this.akkaDuration = AkkaUtils.getTimeout(flinkConfig);
    this.akkaTimeout = Timeout.durationToTimeout(akkaDuration);
    this.yarnClient = yarnClient;
    this.hadoopConfig = hadoopConfig;
    this.sessionFilesDir = sessionFilesDir;
    this.applicationId = appId;
    this.detached = detached;
    this.flinkConfig = flinkConfig;
    this.appId = appId;

    // get one application report manually
    intialAppReport = yarnClient.getApplicationReport(appId);
    String jobManagerHost = intialAppReport.getHost();
    int jobManagerPort = intialAppReport.getRpcPort();
    this.jobManagerAddress = new InetSocketAddress(jobManagerHost, jobManagerPort);
  }
Exemplo n.º 14
0
  /**
   * convert an AM report to a string for diagnostics
   *
   * @param report the report
   * @return the string value
   */
  public static String reportToString(ApplicationReport report) {
    if (report == null) {
      return "Null application report";
    }

    return "App "
        + report.getName()
        + "/"
        + report.getApplicationType()
        + "# "
        + report.getApplicationId()
        + " user "
        + report.getUser()
        + " is in state "
        + report.getYarnApplicationState()
        + "RPC: "
        + report.getHost()
        + ":"
        + report.getRpcPort();
  }
 private ApplicationReport getRunningApplicationReport(String host, int port) {
   ApplicationId appId = ApplicationId.newInstance(1234, 5);
   ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 0);
   return ApplicationReport.newInstance(
       appId,
       attemptId,
       "user",
       "queue",
       "appname",
       host,
       port,
       null,
       YarnApplicationState.RUNNING,
       "diagnostics",
       "url",
       0,
       0,
       FinalApplicationStatus.UNDEFINED,
       null,
       "N/A",
       0.0f,
       YarnConfiguration.DEFAULT_APPLICATION_TYPE,
       null);
 }
 private ApplicationReport getFinishedApplicationReport() {
   ApplicationId appId = ApplicationId.newInstance(1234, 5);
   ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 0);
   return ApplicationReport.newInstance(
       appId,
       attemptId,
       "user",
       "queue",
       "appname",
       "host",
       124,
       null,
       YarnApplicationState.FINISHED,
       "diagnostics",
       "url",
       0,
       0,
       FinalApplicationStatus.SUCCEEDED,
       null,
       "N/A",
       0.0f,
       YarnConfiguration.DEFAULT_APPLICATION_TYPE,
       null);
 }
Exemplo n.º 17
0
  @Override
  public ApplicationId submitApplication(ApplicationSubmissionContext appContext)
      throws YarnException, IOException {
    ApplicationId applicationId = appContext.getApplicationId();
    if (applicationId == null) {
      throw new ApplicationIdNotProvidedException(
          "ApplicationId is not provided in ApplicationSubmissionContext");
    }
    SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class);
    request.setApplicationSubmissionContext(appContext);

    // Automatically add the timeline DT into the CLC
    // Only when the security and the timeline service are both enabled
    if (isSecurityEnabled() && timelineServiceEnabled) {
      addTimelineDelegationToken(appContext.getAMContainerSpec());
    }

    // TODO: YARN-1763:Handle RM failovers during the submitApplication call.
    rmClient.submitApplication(request);

    int pollCount = 0;
    long startTime = System.currentTimeMillis();
    EnumSet<YarnApplicationState> waitingStates =
        EnumSet.of(
            YarnApplicationState.NEW,
            YarnApplicationState.NEW_SAVING,
            YarnApplicationState.SUBMITTED);
    EnumSet<YarnApplicationState> failToSubmitStates =
        EnumSet.of(YarnApplicationState.FAILED, YarnApplicationState.KILLED);
    while (true) {
      try {
        ApplicationReport appReport = getApplicationReport(applicationId);
        YarnApplicationState state = appReport.getYarnApplicationState();
        if (!waitingStates.contains(state)) {
          if (failToSubmitStates.contains(state)) {
            throw new YarnException(
                "Failed to submit " + applicationId + " to YARN : " + appReport.getDiagnostics());
          }
          LOG.info("Submitted application " + applicationId);
          break;
        }

        long elapsedMillis = System.currentTimeMillis() - startTime;
        if (enforceAsyncAPITimeout() && elapsedMillis >= asyncApiPollTimeoutMillis) {
          throw new YarnException(
              "Timed out while waiting for application "
                  + applicationId
                  + " to be submitted successfully");
        }

        // Notify the client through the log every 10 poll, in case the client
        // is blocked here too long.
        if (++pollCount % 10 == 0) {
          LOG.info(
              "Application submission is not finished, "
                  + "submitted application "
                  + applicationId
                  + " is still in "
                  + state);
        }
        try {
          Thread.sleep(submitPollIntervalMillis);
        } catch (InterruptedException ie) {
          LOG.error(
              "Interrupted while waiting for application "
                  + applicationId
                  + " to be successfully submitted.");
        }
      } catch (ApplicationNotFoundException ex) {
        // FailOver or RM restart happens before RMStateStore saves
        // ApplicationState
        LOG.info(
            "Re-submit application "
                + applicationId
                + "with the "
                + "same ApplicationSubmissionContext");
        rmClient.submitApplication(request);
      }
    }

    return applicationId;
  }
Exemplo n.º 18
0
 public static boolean hasAppFinished(ApplicationReport report) {
   return report == null
       || report.getYarnApplicationState().ordinal() >= YarnApplicationState.FINISHED.ordinal();
 }
Exemplo n.º 19
0
  private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
      tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
      tmpInFile = tmp.newFile();
      FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    Runner runner =
        startWithArgs(
            new String[] {
              "run",
              "-m",
              "yarn-cluster",
              "-yj",
              flinkUberjar.getAbsolutePath(),
              "-yt",
              flinkLibFolder.getAbsolutePath(),
              "-yn",
              "1",
              "-yjm",
              "768",
              "-yD",
              "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
              "-ytm",
              "1024",
              "-ys",
              "2", // test requesting slots from YARN.
              "--yarndetached",
              job,
              tmpInFile.getAbsoluteFile().toString(),
              tmpOutFolder.getAbsoluteFile().toString()
            },
            "Job has been submitted with JobID",
            RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
      }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

      ApplicationId tmpAppId;
      if (apps.size() == 1) {
        // Better method to find the right appId. But sometimes the app is shutting down very fast
        // Only one running
        tmpAppId = apps.get(0).getApplicationId();

        LOG.info("waiting for the job with appId {} to finish", tmpAppId);
        // wait until the app has finished
        while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
          sleep(500);
        }
      } else {
        // get appId by finding the latest finished appid
        apps = yc.getApplications();
        Collections.sort(
            apps,
            new Comparator<ApplicationReport>() {
              @Override
              public int compare(ApplicationReport o1, ApplicationReport o2) {
                return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
              }
            });
        tmpAppId = apps.get(0).getApplicationId();
        LOG.info(
            "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
      }
      final ApplicationId id = tmpAppId;

      // now it has finished.
      // check the output files.
      File[] listOfOutputFiles = tmpOutFolder.listFiles();

      Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
      LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

      // read all output files in output folder to one output string
      String content = "";
      for (File f : listOfOutputFiles) {
        if (f.isFile()) {
          content += FileUtils.readFileToString(f) + "\n";
        }
      }
      // String content = FileUtils.readFileToString(taskmanagerOut);
      // check for some of the wordcount outputs.
      Assert.assertTrue(
          "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
          content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
      Assert.assertTrue(
          "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
          content.contains("der 29")
              || content.contains("(der,29)")
              || content.contains("(mind,1)"));

      // check if the heap size for the TaskManager was set correctly
      File jobmanagerLog =
          YarnTestBase.findFile(
              "..",
              new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                  return name.contains("jobmanager.log")
                      && dir.getAbsolutePath().contains(id.toString());
                }
              });
      Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
      content = FileUtils.readFileToString(jobmanagerLog);
      // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
      String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m";
      Assert.assertTrue(
          "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
          content.contains(expected));
      expected = " (2/2) (attempt #0) to ";
      Assert.assertTrue(
          "Expected string '"
              + expected
              + "' not found in JobManager log."
              + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
              + jobmanagerLog
              + "'",
          content.contains(expected));

      // make sure the detached app is really finished.
      LOG.info("Checking again that app has finished");
      ApplicationReport rep;
      do {
        sleep(500);
        rep = yc.getApplicationReport(id);
        LOG.info("Got report {}", rep);
      } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
      LOG.warn("Error while detached yarn session was running", t);
      Assert.fail(t.getMessage());
    }
  }
Exemplo n.º 20
0
  /** Test TaskManager failure */
  @Test(timeout = 100000) // timeout after 100 seconds
  public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner =
        startWithArgs(
            new String[] {
              "-j",
              flinkUberjar.getAbsolutePath(),
              "-t",
              flinkLibFolder.getAbsolutePath(),
              "-n",
              "1",
              "-jm",
              "768",
              "-tm",
              "1024",
              "-nm",
              "customName",
              "-Dfancy-configuration-value=veryFancy",
              "-Dyarn.maximum-failed-containers=3"
            },
            "Number of connected TaskManagers changed to 1. Slots available: 1",
            RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------
    try {
      YarnClient yc = YarnClient.createYarnClient();
      yc.init(yarnConfiguration);
      yc.start();
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
      Assert.assertEquals(1, apps.size()); // Only one running
      ApplicationReport app = apps.get(0);
      Assert.assertEquals("customName", app.getName());
      String url = app.getTrackingUrl();
      if (!url.endsWith("/")) {
        url += "/";
      }
      if (!url.startsWith("http://")) {
        url = "http://" + url;
      }
      LOG.info("Got application URL from YARN {}", url);

      String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
      JSONObject parsedTMs = new JSONObject(response);
      JSONArray taskManagers = parsedTMs.getJSONArray("taskmanagers");
      Assert.assertNotNull(taskManagers);
      Assert.assertEquals(1, taskManagers.length());
      Assert.assertEquals(1, taskManagers.getJSONObject(0).getInt("slotsNumber"));

      // get the configuration from webinterface & check if the dynamic properties from YARN show up
      // there.
      String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
      JSONArray parsed = new JSONArray(jsonConfig);
      Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(parsed);

      Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
      Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));

      // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
      // first, get the hostname/port
      String oC = outContent.toString();
      Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
      Matcher matches = p.matcher(oC);
      String hostname = null;
      String port = null;
      while (matches.find()) {
        hostname = matches.group(1).toLowerCase();
        port = matches.group(2);
      }
      LOG.info("Extracted hostname:port: {} {}", hostname, port);

      Assert.assertEquals(
          "unable to find hostname in " + parsed,
          hostname,
          parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
      Assert.assertEquals(
          "unable to find port in " + parsed,
          port,
          parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

      // test logfile access
      String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
      Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster/JobManager (Version"));
    } catch (Throwable e) {
      LOG.warn("Error while running test", e);
      Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager  -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
      remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
      LOG.warn("Unable to get curr user", e);
      Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
      NodeManager nm = yarnCluster.getNodeManager(nmId);
      ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
      for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
        String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
        if (command.contains(YarnTaskManagerRunner.class.getSimpleName())) {
          taskManagerContainer = entry.getKey();
          nodeManager = nm;
          nmIdent =
              new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
          // allow myself to do stuff with the container
          // remoteUgi.addCredentials(entry.getValue().getCredentials());
          remoteUgi.addTokenIdentifier(nmIdent);
        }
      }
      sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
      nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
      LOG.warn("Error stopping container", e);
      Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
      LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

      String o = errContent.toString();
      int killedOff = o.indexOf("Container killed by the ApplicationMaster");
      if (killedOff != -1) {
        o = o.substring(killedOff);
        ok = o.indexOf("Launching container") > 0;
      }
      sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
      runner.join(1000);
    } catch (InterruptedException e) {
      LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue(
        "Expect to see failed container", eC.contains("New messages from the YARN cluster"));
    Assert.assertTrue(
        "Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
    Assert.assertTrue(
        "Expect to see new container started",
        eC.contains("Launching container") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
  }
Exemplo n.º 21
0
  public void run(String[] args) throws Exception {
    final String command = args[0];
    final int n = Integer.valueOf(args[1]);
    final Path jarPath = new Path(args[2]);

    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
    amContainer.setCommands(
        Collections.singletonList(
            "$JAVA_HOME/bin/java"
                + " -Xmx256M"
                + " com.hortonworks.simpleyarnapp.ApplicationMaster"
                + " "
                + command
                + " "
                + String.valueOf(n)
                + " 1>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stdout"
                + " 2>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stderr"));

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    setupAppMasterJar(jarPath, appMasterJar);
    System.out.println("Jar name is " + jarPath.getName());
    amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("apache-yarn-example"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue

    // Submit application
    ApplicationId appId = appContext.getApplicationId();
    System.out.println("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED
        && appState != YarnApplicationState.KILLED
        && appState != YarnApplicationState.FAILED) {
      System.out.println("App Status = " + appState);
      Thread.sleep(100);
      appReport = yarnClient.getApplicationReport(appId);
      appState = appReport.getYarnApplicationState();
    }

    System.out.println(
        "Application "
            + appId
            + " finished with"
            + " state "
            + appState
            + " at "
            + appReport.getFinishTime());
  }
Exemplo n.º 22
0
 @Private
 @Unstable
 public static ApplicationReport newInstance(
     ApplicationId applicationId,
     ApplicationAttemptId applicationAttemptId,
     String user,
     String queue,
     String name,
     String host,
     int rpcPort,
     Token clientToAMToken,
     YarnApplicationState state,
     String diagnostics,
     String url,
     long startTime,
     long finishTime,
     FinalApplicationStatus finalStatus,
     ApplicationResourceUsageReport appResources,
     String origTrackingUrl,
     float progress,
     String applicationType,
     Token amRmToken) {
   ApplicationReport report = Records.newRecord(ApplicationReport.class);
   report.setApplicationId(applicationId);
   report.setCurrentApplicationAttemptId(applicationAttemptId);
   report.setUser(user);
   report.setQueue(queue);
   report.setName(name);
   report.setHost(host);
   report.setRpcPort(rpcPort);
   report.setClientToAMToken(clientToAMToken);
   report.setYarnApplicationState(state);
   report.setDiagnostics(diagnostics);
   report.setTrackingUrl(url);
   report.setStartTime(startTime);
   report.setFinishTime(finishTime);
   report.setFinalApplicationStatus(finalStatus);
   report.setApplicationResourceUsageReport(appResources);
   report.setOriginalTrackingUrl(origTrackingUrl);
   report.setProgress(progress);
   report.setApplicationType(applicationType);
   report.setAMRMToken(amRmToken);
   return report;
 }
Exemplo n.º 23
0
  /**
   * Verify the web service deployment and lifecycle functionality
   *
   * @throws Exception
   */
  @Ignore // disabled due to web service init delay issue
  @Test
  public void testWebService() throws Exception {

    // single container topology of inline input and module
    Properties props = new Properties();
    props.put(
        StreamingApplication.DT_PREFIX + "stream.input.classname",
        TestGeneratorInputOperator.class.getName());
    props.put(StreamingApplication.DT_PREFIX + "stream.input.outputNode", "module1");
    props.put(
        StreamingApplication.DT_PREFIX + "module.module1.classname",
        GenericTestOperator.class.getName());

    LOG.info("Initializing Client");
    LogicalPlanConfiguration tb = new LogicalPlanConfiguration(new Configuration(false));
    tb.addFromProperties(props, null);

    StramClient client = new StramClient(new Configuration(yarnCluster.getConfig()), createDAG(tb));
    if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) {
      client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster
    }
    try {
      client.start();
      client.startApplication();

      // attempt web service connection
      ApplicationReport appReport = client.getApplicationReport();
      Thread.sleep(5000); // delay to give web service time to fully initialize
      Client wsClient = Client.create();
      wsClient.setFollowRedirects(true);
      WebResource r =
          wsClient
              .resource("http://" + appReport.getTrackingUrl())
              .path(StramWebServices.PATH)
              .path(StramWebServices.PATH_INFO);
      LOG.info("Requesting: " + r.getURI());
      ClientResponse response = r.accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
      assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
      JSONObject json = response.getEntity(JSONObject.class);
      LOG.info("Got response: " + json.toString());
      assertEquals("incorrect number of elements", 1, json.length());
      assertEquals("appId", appReport.getApplicationId().toString(), json.get("id"));
      r =
          wsClient
              .resource("http://" + appReport.getTrackingUrl())
              .path(StramWebServices.PATH)
              .path(StramWebServices.PATH_PHYSICAL_PLAN_OPERATORS);
      LOG.info("Requesting: " + r.getURI());
      response = r.accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
      assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
      json = response.getEntity(JSONObject.class);
      LOG.info("Got response: " + json.toString());

    } finally {
      // LOG.info("waiting...");
      // synchronized (this) {
      //  this.wait();
      // }
      // boolean result = client.monitorApplication();
      client.killApplication();
      client.stop();
    }
  }
Exemplo n.º 24
0
    private List<ApplicationReport> createAppReports() {
      ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
      ApplicationReport newApplicationReport =
          ApplicationReport.newInstance(
              applicationId,
              ApplicationAttemptId.newInstance(applicationId, 1),
              "user",
              "queue",
              "appname",
              "host",
              124,
              null,
              YarnApplicationState.RUNNING,
              "diagnostics",
              "url",
              0,
              0,
              FinalApplicationStatus.SUCCEEDED,
              null,
              "N/A",
              0.53789f,
              "YARN",
              null);
      List<ApplicationReport> applicationReports = new ArrayList<ApplicationReport>();
      applicationReports.add(newApplicationReport);
      List<ApplicationAttemptReport> appAttempts = new ArrayList<ApplicationAttemptReport>();
      ApplicationAttemptReport attempt =
          ApplicationAttemptReport.newInstance(
              ApplicationAttemptId.newInstance(applicationId, 1),
              "host",
              124,
              "url",
              "oUrl",
              "diagnostics",
              YarnApplicationAttemptState.FINISHED,
              ContainerId.newContainerId(newApplicationReport.getCurrentApplicationAttemptId(), 1));
      appAttempts.add(attempt);
      ApplicationAttemptReport attempt1 =
          ApplicationAttemptReport.newInstance(
              ApplicationAttemptId.newInstance(applicationId, 2),
              "host",
              124,
              "url",
              "oUrl",
              "diagnostics",
              YarnApplicationAttemptState.FINISHED,
              ContainerId.newContainerId(newApplicationReport.getCurrentApplicationAttemptId(), 2));
      appAttempts.add(attempt1);
      attempts.put(applicationId, appAttempts);

      List<ContainerReport> containerReports = new ArrayList<ContainerReport>();
      ContainerReport container =
          ContainerReport.newInstance(
              ContainerId.newContainerId(attempt.getApplicationAttemptId(), 1),
              null,
              NodeId.newInstance("host", 1234),
              Priority.UNDEFINED,
              1234,
              5678,
              "diagnosticInfo",
              "logURL",
              0,
              ContainerState.COMPLETE,
              "http://" + NodeId.newInstance("host", 2345).toString());
      containerReports.add(container);

      ContainerReport container1 =
          ContainerReport.newInstance(
              ContainerId.newContainerId(attempt.getApplicationAttemptId(), 2),
              null,
              NodeId.newInstance("host", 1234),
              Priority.UNDEFINED,
              1234,
              5678,
              "diagnosticInfo",
              "logURL",
              0,
              ContainerState.COMPLETE,
              "http://" + NodeId.newInstance("host", 2345).toString());

      containerReports.add(container1);
      containers.put(attempt.getApplicationAttemptId(), containerReports);

      ApplicationId applicationId2 = ApplicationId.newInstance(1234, 6);
      ApplicationReport newApplicationReport2 =
          ApplicationReport.newInstance(
              applicationId2,
              ApplicationAttemptId.newInstance(applicationId2, 2),
              "user2",
              "queue2",
              "appname2",
              "host2",
              125,
              null,
              YarnApplicationState.FINISHED,
              "diagnostics2",
              "url2",
              2,
              2,
              FinalApplicationStatus.SUCCEEDED,
              null,
              "N/A",
              0.63789f,
              "NON-YARN",
              null);
      applicationReports.add(newApplicationReport2);

      ApplicationId applicationId3 = ApplicationId.newInstance(1234, 7);
      ApplicationReport newApplicationReport3 =
          ApplicationReport.newInstance(
              applicationId3,
              ApplicationAttemptId.newInstance(applicationId3, 3),
              "user3",
              "queue3",
              "appname3",
              "host3",
              126,
              null,
              YarnApplicationState.RUNNING,
              "diagnostics3",
              "url3",
              3,
              3,
              FinalApplicationStatus.SUCCEEDED,
              null,
              "N/A",
              0.73789f,
              "MAPREDUCE",
              null);
      applicationReports.add(newApplicationReport3);

      ApplicationId applicationId4 = ApplicationId.newInstance(1234, 8);
      ApplicationReport newApplicationReport4 =
          ApplicationReport.newInstance(
              applicationId4,
              ApplicationAttemptId.newInstance(applicationId4, 4),
              "user4",
              "queue4",
              "appname4",
              "host4",
              127,
              null,
              YarnApplicationState.FAILED,
              "diagnostics4",
              "url4",
              4,
              4,
              FinalApplicationStatus.SUCCEEDED,
              null,
              "N/A",
              0.83789f,
              "NON-MAPREDUCE",
              null);
      applicationReports.add(newApplicationReport4);
      return applicationReports;
    }
    @Override
    public GetApplicationReportResponse getApplicationReport(GetApplicationReportRequest request)
        throws IOException {
      ApplicationId applicationId = request.getApplicationId();
      ApplicationReport application = recordFactory.newRecordInstance(ApplicationReport.class);
      application.setApplicationId(applicationId);
      application.setFinalApplicationStatus(FinalApplicationStatus.UNDEFINED);
      if (amRunning) {
        application.setYarnApplicationState(YarnApplicationState.RUNNING);
      } else if (amRestarting) {
        application.setYarnApplicationState(YarnApplicationState.SUBMITTED);
      } else {
        application.setYarnApplicationState(YarnApplicationState.FINISHED);
        application.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
      }
      String[] split = AMHOSTADDRESS.split(":");
      application.setHost(split[0]);
      application.setRpcPort(Integer.parseInt(split[1]));
      application.setUser("TestClientRedirect-user");
      application.setName("N/A");
      application.setQueue("N/A");
      application.setStartTime(0);
      application.setFinishTime(0);
      application.setTrackingUrl("N/A");
      application.setDiagnostics("N/A");

      GetApplicationReportResponse response =
          recordFactory.newRecordInstance(GetApplicationReportResponse.class);
      response.setApplicationReport(application);
      return response;
    }