Esempio n. 1
0
 private static void setJobID(Job job, JobID jobID, String namedOutput) {
   JobID newJobID =
       jobID == null || jobID.getJtIdentifier().contains(namedOutput)
           ? jobID
           : new JobID(jobID.getJtIdentifier() + "_" + namedOutput, jobID.getId());
   job.setJobID(newJobID);
 }
Esempio n. 2
0
  /**
   * Internal method for submitting jobs to the system.
   *
   * <p>The job submission process involves:
   *
   * <ol>
   *   <li>Checking the input and output specifications of the job.
   *   <li>Computing the {@link InputSplit}s for the job.
   *   <li>Setup the requisite accounting information for the {@link DistributedCache} of the job,
   *       if necessary.
   *   <li>Copying the job's jar and configuration to the map-reduce system directory on the
   *       distributed file-system.
   *   <li>Submitting the job to the <code>JobTracker</code> and optionally monitoring it's status.
   * </ol>
   *
   * @param job the configuration to submit
   * @param cluster the handle to the Cluster
   * @throws ClassNotFoundException
   * @throws InterruptedException
   * @throws IOException
   */
  JobStatus submitJobInternal(Job job, Cluster cluster)
      throws ClassNotFoundException, InterruptedException, IOException {

    // validate the jobs output specs
    //// check use new api or not. check output valid or invalid.
    checkSpecs(job);

    //// usually, /tmp/username as staging area.
    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, job.getConfiguration());
    // configure the command line options correctly on the submitting dfs
    Configuration conf = job.getConfiguration();
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
      submitHostAddress = ip.getHostAddress();
      submitHostName = ip.getHostName();
      conf.set(MRJobConfig.JOB_SUBMITHOST, submitHostName);
      conf.set(MRJobConfig.JOB_SUBMITHOSTADDR, submitHostAddress);
    }
    JobID jobId = submitClient.getNewJobID();
    job.setJobID(jobId);
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;
    try {
      conf.set(
          "hadoop.http.filter.initializers",
          "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");

      //// set mapreduce_job_dir as job staging tmp dir.
      conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString());
      LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
      // get delegation token for the dir

      // secret be stored in TokenCache before job be submitted. and get secret during job running.
      //// this is a static method.
      //// secret stored before submission and read during task.
      TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] {submitJobDir}, conf);

      //// get secret keys and store them into TokenCache.
      populateTokenCache(conf, job.getCredentials());

      //// copy lib、files... into submitJobDir
      copyAndConfigureFiles(job, submitJobDir);
      //// change job.xml dir to Path.
      Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);

      // Create the splits for the job
      LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir));
      //// InputFormat.getSplits()
      int maps = writeSplits(job, submitJobDir);
      conf.setInt(MRJobConfig.NUM_MAPS, maps);
      LOG.info("number of splits:" + maps);

      // write "queue admins of the queue to which job is being submitted"
      // to job file.
      String queue = conf.get(MRJobConfig.QUEUE_NAME, JobConf.DEFAULT_QUEUE_NAME);
      AccessControlList acl = submitClient.getQueueAdmins(queue);
      conf.set(
          toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());

      // removing jobtoken referrals before copying the jobconf to HDFS
      // as the tasks don't need this setting, actually they may break
      // because of it if present as the referral will point to a
      // different job.
      TokenCache.cleanUpTokenReferral(conf);

      // Write job file to submit dir
      writeConf(conf, submitJobFile);

      //
      // Now, actually submit the job (using the submit name)
      //
      printTokens(jobId, job.getCredentials());
      //// find implement usage of submitJob, result: YARNRunner and LocalJobRunner
      status = submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
      if (status != null) {
        return status;
      } else {
        throw new IOException("Could not launch job");
      }
    } finally {
      if (status == null) {
        LOG.info("Cleaning up the staging area " + submitJobDir);
        if (jtFs != null && submitJobDir != null) jtFs.delete(submitJobDir, true);
      }
    }
  }