예제 #1
0
 private void waitTillAccepted(YarnClient rmClient, ApplicationId appId) throws Exception {
   try {
     long start = System.currentTimeMillis();
     ApplicationReport report = rmClient.getApplicationReport(appId);
     while (YarnApplicationState.ACCEPTED != report.getYarnApplicationState()) {
       if (System.currentTimeMillis() - start > 20 * 1000) {
         throw new Exception("App '" + appId + "' time out, failed to reach ACCEPTED state");
       }
       Thread.sleep(200);
       report = rmClient.getApplicationReport(appId);
     }
   } catch (Exception ex) {
     throw new Exception(ex);
   }
 }
 public TezSessionStatus getSessionStatus() throws TezException, IOException {
   try {
     ApplicationReport appReport = yarnClient.getApplicationReport(applicationId);
     switch (appReport.getYarnApplicationState()) {
       case NEW:
       case NEW_SAVING:
       case ACCEPTED:
       case SUBMITTED:
         return TezSessionStatus.INITIALIZING;
       case FINISHED:
       case FAILED:
       case KILLED:
         return TezSessionStatus.SHUTDOWN;
       case RUNNING:
         try {
           DAGClientAMProtocolBlockingPB proxy =
               TezClientUtils.getSessionAMProxy(
                   yarnClient, sessionConfig.getYarnConfiguration(), applicationId);
           if (proxy == null) {
             return TezSessionStatus.INITIALIZING;
           }
           GetAMStatusResponseProto response =
               proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build());
           return DagTypeConverters.convertTezSessionStatusFromProto(response.getStatus());
         } catch (TezException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         } catch (ServiceException e) {
           LOG.info("Failed to retrieve AM Status via proxy", e);
         }
     }
   } catch (YarnException e) {
     throw new TezException(e);
   }
   return TezSessionStatus.INITIALIZING;
 }
예제 #3
0
 @Override
 public void run() {
   while (running.get() && yarnClient.isInState(Service.STATE.STARTED)) {
     try {
       ApplicationReport report = yarnClient.getApplicationReport(appId);
       synchronized (lock) {
         lastReport = report;
       }
     } catch (Exception e) {
       LOG.warn("Error while getting application report", e);
     }
     try {
       Thread.sleep(FlinkYarnCluster.POLLING_THREAD_INTERVAL_MS);
     } catch (InterruptedException e) {
       LOG.error("Polling thread got interrupted", e);
       Thread.currentThread().interrupt(); // pass interrupt.
     }
   }
   if (running.get() && !yarnClient.isInState(Service.STATE.STARTED)) {
     // == if the polling thread is still running but the yarn client is stopped.
     LOG.warn("YARN client is unexpected in state " + yarnClient.getServiceState());
   }
 }
예제 #4
0
  /**
   * Create a new Flink on YARN cluster.
   *
   * @param yarnClient
   * @param appId the YARN application ID
   * @param hadoopConfig
   * @param flinkConfig
   * @param sessionFilesDir
   * @param detached Set to true if no actor system or RPC communication with the cluster should be
   *     established
   * @throws IOException
   * @throws YarnException
   */
  public FlinkYarnCluster(
      final YarnClient yarnClient,
      final ApplicationId appId,
      Configuration hadoopConfig,
      org.apache.flink.configuration.Configuration flinkConfig,
      Path sessionFilesDir,
      boolean detached)
      throws IOException, YarnException {
    this.akkaDuration = AkkaUtils.getTimeout(flinkConfig);
    this.akkaTimeout = Timeout.durationToTimeout(akkaDuration);
    this.yarnClient = yarnClient;
    this.hadoopConfig = hadoopConfig;
    this.sessionFilesDir = sessionFilesDir;
    this.applicationId = appId;
    this.detached = detached;
    this.flinkConfig = flinkConfig;
    this.appId = appId;

    // get one application report manually
    intialAppReport = yarnClient.getApplicationReport(appId);
    String jobManagerHost = intialAppReport.getHost();
    int jobManagerPort = intialAppReport.getRpcPort();
    this.jobManagerAddress = new InetSocketAddress(jobManagerHost, jobManagerPort);
  }
예제 #5
0
 public ApplicationReport getApplicationReport() throws YarnException, IOException {
   return yarnClient.getApplicationReport(this.appId);
 }
예제 #6
0
  private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
      tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
      tmpInFile = tmp.newFile();
      FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    Runner runner =
        startWithArgs(
            new String[] {
              "run",
              "-m",
              "yarn-cluster",
              "-yj",
              flinkUberjar.getAbsolutePath(),
              "-yt",
              flinkLibFolder.getAbsolutePath(),
              "-yn",
              "1",
              "-yjm",
              "768",
              "-yD",
              "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
              "-ytm",
              "1024",
              "-ys",
              "2", // test requesting slots from YARN.
              "--yarndetached",
              job,
              tmpInFile.getAbsoluteFile().toString(),
              tmpOutFolder.getAbsoluteFile().toString()
            },
            "Job has been submitted with JobID",
            RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
      }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
      List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

      ApplicationId tmpAppId;
      if (apps.size() == 1) {
        // Better method to find the right appId. But sometimes the app is shutting down very fast
        // Only one running
        tmpAppId = apps.get(0).getApplicationId();

        LOG.info("waiting for the job with appId {} to finish", tmpAppId);
        // wait until the app has finished
        while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
          sleep(500);
        }
      } else {
        // get appId by finding the latest finished appid
        apps = yc.getApplications();
        Collections.sort(
            apps,
            new Comparator<ApplicationReport>() {
              @Override
              public int compare(ApplicationReport o1, ApplicationReport o2) {
                return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
              }
            });
        tmpAppId = apps.get(0).getApplicationId();
        LOG.info(
            "Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
      }
      final ApplicationId id = tmpAppId;

      // now it has finished.
      // check the output files.
      File[] listOfOutputFiles = tmpOutFolder.listFiles();

      Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
      LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

      // read all output files in output folder to one output string
      String content = "";
      for (File f : listOfOutputFiles) {
        if (f.isFile()) {
          content += FileUtils.readFileToString(f) + "\n";
        }
      }
      // String content = FileUtils.readFileToString(taskmanagerOut);
      // check for some of the wordcount outputs.
      Assert.assertTrue(
          "Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
          content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
      Assert.assertTrue(
          "Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
          content.contains("der 29")
              || content.contains("(der,29)")
              || content.contains("(mind,1)"));

      // check if the heap size for the TaskManager was set correctly
      File jobmanagerLog =
          YarnTestBase.findFile(
              "..",
              new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                  return name.contains("jobmanager.log")
                      && dir.getAbsolutePath().contains(id.toString());
                }
              });
      Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
      content = FileUtils.readFileToString(jobmanagerLog);
      // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
      String expected = "Starting TM with command=$JAVA_HOME/bin/java -Xms424m -Xmx424m";
      Assert.assertTrue(
          "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
          content.contains(expected));
      expected = " (2/2) (attempt #0) to ";
      Assert.assertTrue(
          "Expected string '"
              + expected
              + "' not found in JobManager log."
              + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
              + jobmanagerLog
              + "'",
          content.contains(expected));

      // make sure the detached app is really finished.
      LOG.info("Checking again that app has finished");
      ApplicationReport rep;
      do {
        sleep(500);
        rep = yc.getApplicationReport(id);
        LOG.info("Got report {}", rep);
      } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
      LOG.warn("Error while detached yarn session was running", t);
      Assert.fail(t.getMessage());
    }
  }
예제 #7
0
  public void run(String[] args) throws Exception {
    final String command = args[0];
    final int n = Integer.valueOf(args[1]);
    final Path jarPath = new Path(args[2]);

    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
    amContainer.setCommands(
        Collections.singletonList(
            "$JAVA_HOME/bin/java"
                + " -Xmx256M"
                + " com.hortonworks.simpleyarnapp.ApplicationMaster"
                + " "
                + command
                + " "
                + String.valueOf(n)
                + " 1>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stdout"
                + " 2>"
                + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/stderr"));

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    setupAppMasterJar(jarPath, appMasterJar);
    System.out.println("Jar name is " + jarPath.getName());
    amContainer.setLocalResources(Collections.singletonMap(jarPath.getName(), appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("apache-yarn-example"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue

    // Submit application
    ApplicationId appId = appContext.getApplicationId();
    System.out.println("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED
        && appState != YarnApplicationState.KILLED
        && appState != YarnApplicationState.FAILED) {
      System.out.println("App Status = " + appState);
      Thread.sleep(100);
      appReport = yarnClient.getApplicationReport(appId);
      appState = appReport.getYarnApplicationState();
    }

    System.out.println(
        "Application "
            + appId
            + " finished with"
            + " state "
            + appState
            + " at "
            + appReport.getFinishTime());
  }