示例#1
0
 @Override
 public boolean hasFailed() {
   if (!isConnected) {
     throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
   }
   if (pollingRunner == null) {
     LOG.warn(
         "FlinkYarnCluster.hasFailed() has been called on an uninitialized cluster."
             + "The system might be in an erroneous state");
   }
   ApplicationReport lastReport = pollingRunner.getLastReport();
   if (lastReport == null) {
     LOG.warn(
         "FlinkYarnCluster.hasFailed() has been called on a cluster that didn't receive a status so far."
             + "The system might be in an erroneous state");
     return false;
   } else {
     YarnApplicationState appState = lastReport.getYarnApplicationState();
     boolean status =
         (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED);
     if (status) {
       LOG.warn("YARN reported application state {}", appState);
       LOG.warn("Diagnostics: {}", lastReport.getDiagnostics());
     }
     return status;
   }
 }
 public TezSessionStatus getSessionStatus() throws TezException, IOException {
   try {
     ApplicationReport appReport = yarnClient.getApplicationReport(applicationId);
     switch (appReport.getYarnApplicationState()) {
       case NEW:
       case NEW_SAVING:
       case ACCEPTED:
       case SUBMITTED:
         return TezSessionStatus.INITIALIZING;
       case FINISHED:
       case FAILED:
       case KILLED:
         return TezSessionStatus.SHUTDOWN;
       case RUNNING:
         try {
           DAGClientAMProtocolBlockingPB proxy =
               TezClientUtils.getSessionAMProxy(
                   yarnClient, sessionConfig.getYarnConfiguration(), applicationId);
           if (proxy == null) {
             return TezSessionStatus.INITIALIZING;
           }
           GetAMStatusResponseProto response =
               proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build());
           return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus());
         } catch (TezException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         } catch (ServiceException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         }
     }
   } catch (YarnException e) {
     throw new TezException(e);
   }
   return TezSessionStatus.INITIALIZING;
 }
示例#3
0
 public static String appReportToString(ApplicationReport r, String separator) {
   StringBuilder builder = new StringBuilder(512);
   builder.append("application ").append(r.getName()).append("/").append(r.getApplicationType());
   builder.append(separator).append("state: ").append(r.getYarnApplicationState());
   builder.append(separator).append("URL: ").append(r.getTrackingUrl());
   builder
       .append(separator)
       .append("Started ")
       .append(new Date(r.getStartTime()).toLocaleString());
   long finishTime = r.getFinishTime();
   if (finishTime > 0) {
     builder.append(separator).append("Finished ").append(new Date(finishTime).toLocaleString());
   }
   builder
       .append(separator)
       .append("RPC :")
       .append(r.getHost())
       .append(':')
       .append(r.getRpcPort());
   String diagnostics = r.getDiagnostics();
   if (!diagnostics.isEmpty()) {
     builder.append(separator).append("Diagnostics :").append(diagnostics);
   }
   return builder.toString();
 }
 public void setYarnApplicationState(YarnApplicationState state) {
   when(mockReport.getYarnApplicationState())
       .thenReturn(
           YarnApplicationState.NEW,
           YarnApplicationState.NEW_SAVING,
           YarnApplicationState.NEW_SAVING,
           state);
 }
示例#5
0
  @Before
  public void checkClusterEmpty() throws IOException, YarnException {
    if (yarnClient == null) {
      yarnClient = YarnClient.createYarnClient();
      yarnClient.init(yarnConfiguration);
      yarnClient.start();
    }

    List<ApplicationReport> apps = yarnClient.getApplications();
    for (ApplicationReport app : apps) {
      if (app.getYarnApplicationState() != YarnApplicationState.FINISHED
          && app.getYarnApplicationState() != YarnApplicationState.KILLED
          && app.getYarnApplicationState() != YarnApplicationState.FAILED) {
        Assert.fail(
            "There is at least one application on the cluster is not finished."
                + "App "
                + app.getApplicationId()
                + " is in state "
                + app.getYarnApplicationState());
      }
    }
  }
 private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception {
   try {
     long start = System.currentTimeMillis();
     ApplicationReport report = rmClient.getApplicationReport(appId);
     while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) {
       if (System.currentTimeMillis() - start > 20 * 1000) {
         throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state");
       }
       Thread.sleep(200);
       report = rmClient.getApplicationReport(appId);
     }
   } catch (Exception ex) {
     throw new Exception(ex);
   }
 }
示例#7
0
  /**
   * convert an AM report to a string for diagnostics
   *
   * @param report the report
   * @return the string value
   */
  public static String reportToString(ApplicationReport report) {
    if (report == null) {
      return "Null application report";
    }

    return "App "
        + report.getName()
        + "/"
        + report.getApplicationType()
        + "# "
        + report.getApplicationId()
        + " user "
        + report.getUser()
        + " is in state "
        + report.getYarnApplicationState()
        + "RPC: "
        + report.getHost()
        + ":"
        + report.getRpcPort();
  }
示例#8
0
 @Test(timeout = 20000)
 public void testJobSubmissionFailure() throws Exception {
   when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class)))
       .thenReturn(appId);
   ApplicationReport report = mock(ApplicationReport.class);
   when(report.getApplicationId()).thenReturn(appId);
   when(report.getDiagnostics()).thenReturn(failString);
   when(report.getYarnApplicationState()).thenReturn(YarnApplicationState.FAILED);
   when(resourceMgrDelegate.getApplicationReport(appId)).thenReturn(report);
   Credentials credentials = new Credentials();
   File jobxml = new File(testWorkDir, "job.xml");
   OutputStream out = new FileOutputStream(jobxml);
   conf.writeXml(out);
   out.close();
   try {
     yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials);
   } catch (IOException io) {
     LOG.info("Logging exception:", io);
     assertTrue(io.getLocalizedMessage().contains(failString));
   }
 }
    private List<ApplicationReport> getApplicationReports(
        List<ApplicationReport> applicationReports,
        Set<String> applicationTypes,
        EnumSet<YarnApplicationState> applicationStates) {

      List<ApplicationReport> appReports = new ArrayList<ApplicationReport>();
      for (ApplicationReport appReport : applicationReports) {
        if (applicationTypes != null && !applicationTypes.isEmpty()) {
          if (!applicationTypes.contains(appReport.getApplicationType())) {
            continue;
          }
        }

        if (applicationStates != null && !applicationStates.isEmpty()) {
          if (!applicationStates.contains(appReport.getYarnApplicationState())) {
            continue;
          }
        }
        appReports.add(appReport);
      }
      return appReports;
    }
示例#10
0
  @Override
  public ApplicationId submitApplication(ApplicationSubmissionContext appContext)
      throws YarnException, IOException {
    ApplicationId applicationId = appContext.getApplicationId();
    if (applicationId == null) {
      throw new ApplicationIdNotProvidedException(
          "ApplicationId is not provided in ApplicationSubmissionContext");
    }
    SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class);
    request.setApplicationSubmissionContext(appContext);

    // Automatically add the timeline DT into the CLC
    // Only when the security and the timeline service are both enabled
    if (isSecurityEnabled() && timelineServiceEnabled) {
      addTimelineDelegationToken(appContext.getAMContainerSpec());
    }

    // TODO: YARN-1763:Handle RM failovers during the submitApplication call.
    rmClient.submitApplication(request);

    int pollCount = 0;
    long startTime = System.currentTimeMillis();
    EnumSet<YarnApplicationState> waitingStates =
        EnumSet.of(
            YarnApplicationState.NEW,
            YarnApplicationState.NEW_SAVING,
            YarnApplicationState.SUBMITTED);
    EnumSet<YarnApplicationState> failToSubmitStates =
        EnumSet.of(YarnApplicationState.FAILED, YarnApplicationState.KILLED);
    while (true) {
      try {
        ApplicationReport appReport = getApplicationReport(applicationId);
        YarnApplicationState state = appReport.getYarnApplicationState();
        if (!waitingStates.contains(state)) {
          if (failToSubmitStates.contains(state)) {
            throw new YarnException(
                "Failed to submit " + applicationId + " to YARN : " + appReport.getDiagnostics());
          }
          LOG.info("Submitted application " + applicationId);
          break;
        }

        long elapsedMillis = System.currentTimeMillis() - startTime;
        if (enforceAsyncAPITimeout() && elapsedMillis >= asyncApiPollTimeoutMillis) {
          throw new YarnException(
              "Timed out while waiting for application "
                  + applicationId
                  + " to be submitted successfully");
        }

        // Notify the client through the log every 10 poll, in case the client
        // is blocked here too long.
        if (++pollCount % 10 == 0) {
          LOG.info(
              "Application submission is not finished, "
                  + "submitted application "
                  + applicationId
                  + " is still in "
                  + state);
        }
        try {
          Thread.sleep(submitPollIntervalMillis);
        } catch (InterruptedException ie) {
          LOG.error(
              "Interrupted while waiting for application "
                  + applicationId
                  + " to be successfully submitted.");
        }
      } catch (ApplicationNotFoundException ex) {
        // FailOver or RM restart happens before RMStateStore saves
        // ApplicationState
        LOG.info(
            "Re-submit application "
                + applicationId
                + "with the "
                + "same ApplicationSubmissionContext");
        rmClient.submitApplication(request);
      }
    }

    return applicationId;
  }
示例#11
0
  private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
      tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
      tmpInFile = tmp.newFile();
      FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    Runner runner =
        startWithArgs(
            new String[] {
              "run",
              "-m",
              "yarn-cluster",
              "-yj",
              flinkUberjar.getAbsolutePath(),
              "-yt",
              flinkLibFolder.getAbsolutePath(),
              "-yn",
              "1",
              "-yjm",
              "768",
              "-yD",
              "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
              "-ytm",
              "1024",
              "-ys",
              "2", // test requesting slots from YARN.
              "--yarndetached",
              job,
              tmpInFile.getAbsoluteFile().toString(),
              tmpOutFolder.getAbsoluteFile().toString()
            },
            "Job has been submitted with JobID",
            RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
      }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

      ApplicationId tmpAppId;
      if (apps.size() == 1) {
        // Better method to find the right appId. But sometimes the app is shutting down very fast
        // Only one running
        tmpAppId = apps.get(0).getApplicationId();

        LOG.info("waiting for the job with appId {} to finish", tmpAppId);
        // wait until the app has finished
        while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
          sleep(500);
        }
      } else {
        // get appId by finding the latest finished appid
        apps = yc.getApplications();
        Collections.sort(
            apps,
            new Comparator<ApplicationReport>() {
              @Override
              public int compare(ApplicationReport o1, ApplicationReport o2) {
                return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
              }
            });
        tmpAppId = apps.get(0).getApplicationId();
        LOG.info(
            "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
      }
      final ApplicationId id = tmpAppId;

      // now it has finished.
      // check the output files.
      File[] listOfOutputFiles = tmpOutFolder.listFiles();

      Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
      LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

      // read all output files in output folder to one output string
      String content = "";
      for (File f : listOfOutputFiles) {
        if (f.isFile()) {
          content += FileUtils.readFileToString(f) + "\n";
        }
      }
      // String content = FileUtils.readFileToString(taskmanagerOut);
      // check for some of the wordcount outputs.
      Assert.assertTrue(
          "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
          content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
      Assert.assertTrue(
          "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
          content.contains("der 29")
              || content.contains("(der,29)")
              || content.contains("(mind,1)"));

      // check if the heap size for the TaskManager was set correctly
      File jobmanagerLog =
          YarnTestBase.findFile(
              "..",
              new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                  return name.contains("jobmanager.log")
                      && dir.getAbsolutePath().contains(id.toString());
                }
              });
      Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
      content = FileUtils.readFileToString(jobmanagerLog);
      // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
      String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m";
      Assert.assertTrue(
          "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
          content.contains(expected));
      expected = " (2/2) (attempt #0) to ";
      Assert.assertTrue(
          "Expected string '"
              + expected
              + "' not found in JobManager log."
              + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
              + jobmanagerLog
              + "'",
          content.contains(expected));

      // make sure the detached app is really finished.
      LOG.info("Checking again that app has finished");
      ApplicationReport rep;
      do {
        sleep(500);
        rep = yc.getApplicationReport(id);
        LOG.info("Got report {}", rep);
      } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
      LOG.warn("Error while detached yarn session was running", t);
      Assert.fail(t.getMessage());
    }
  }
示例#12
0
  public void run(String[] args) throws Exception {
    final String command = args[0];
    final int n = Integer.valueOf(args[1]);
    final Path jarPath = new Path(args[2]);

    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
    amContainer.setCommands(
        Collections.singletonList(
            "$JAVA_HOME/bin/java"
                + " -Xmx256M"
                + " com.hortonworks.simpleyarnapp.ApplicationMaster"
                + " "
                + command
                + " "
                + String.valueOf(n)
                + " 1>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stdout"
                + " 2>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stderr"));

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    setupAppMasterJar(jarPath, appMasterJar);
    System.out.println("Jar name is " + jarPath.getName());
    amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("apache-yarn-example"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue

    // Submit application
    ApplicationId appId = appContext.getApplicationId();
    System.out.println("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED
        && appState != YarnApplicationState.KILLED
        && appState != YarnApplicationState.FAILED) {
      System.out.println("App Status = " + appState);
      Thread.sleep(100);
      appReport = yarnClient.getApplicationReport(appId);
      appState = appReport.getYarnApplicationState();
    }

    System.out.println(
        "Application "
            + appId
            + " finished with"
            + " state "
            + appState
            + " at "
            + appReport.getFinishTime());
  }
示例#13
0
 public static boolean hasAppFinished(ApplicationReport report) {
   return report == null
       || report.getYarnApplicationState().ordinal() >= YarnApplicationState.FINISHED.ordinal();
 }