/**
   * Sets the connector information needed to communicate with Accumulo in this job.
   *
   * <p><b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the
   * configuration as a means to pass the token to MapReduce tasks. This information is BASE64
   * encoded to provide a charset safe conversion to a string, but this conversion is not intended
   * to be secure. {@link PasswordToken} is one example that is insecure in this way; however {@link
   * DelegationToken}s, acquired using {@link
   * SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
   *
   * @param job the Hadoop job instance to be configured
   * @param principal a valid Accumulo user name (user must have Table.CREATE permission)
   * @param token the user's password
   * @since 1.5.0
   */
  public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
      throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
      log.info("Received KerberosToken, attempting to fetch DelegationToken");
      try {
        Instance instance = getInstance(job);
        Connector conn = instance.getConnector(principal, token);
        token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
      } catch (Exception e) {
        log.warn(
            "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
            e);
      }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in
    // the configuration
    if (token instanceof DelegationTokenImpl) {
      DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

      // Convert it into a Hadoop Token
      AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
      Token<AuthenticationTokenIdentifier> hadoopToken =
          new Token<>(
              identifier.getBytes(),
              delegationToken.getPassword(),
              identifier.getKind(),
              delegationToken.getServiceName());

      // Add the Hadoop Token to the Job so it gets serialized and passed along.
      job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
  }
Example #2
0
 /**
  * Create input listing by invoking an appropriate copy listing implementation. Also add
  * delegation tokens for each path to job's credential store
  *
  * @param job - Handle to job
  * @return Returns the path where the copy listing is created
  * @throws IOException - If any
  */
 protected Path createInputFileListing(Job job) throws IOException {
   Path fileListingPath = getFileListingPath();
   CopyListing copyListing =
       CopyListing.getCopyListing(job.getConfiguration(), job.getCredentials(), inputOptions);
   copyListing.buildListing(fileListingPath, inputOptions);
   LOG.info("Number of paths considered for copy: " + copyListing.getNumberOfPaths());
   LOG.info(
       "Number of bytes considered for copy: "
           + copyListing.getBytesToCopy()
           + " (Actual number of bytes copied depends on whether any files are "
           + "skipped or overwritten.)");
   return fileListingPath;
 }
 private void populateTokens(Job job) {
   // Credentials in the job will not have delegation tokens
   // because security is disabled. Fetch delegation tokens
   // and populate the credential in the job.
   try {
     Credentials ts = job.getCredentials();
     Path p1 = new Path("file1");
     p1 = p1.getFileSystem(job.getConfiguration()).makeQualified(p1);
     Credentials cred = new Credentials();
     TokenCache.obtainTokensForNamenodesInternal(cred, new Path[] {p1}, job.getConfiguration());
     for (Token<? extends TokenIdentifier> t : cred.getAllTokens()) {
       ts.addToken(new Text("Hdfs"), t);
     }
   } catch (IOException e) {
     Assert.fail("Exception " + e);
   }
 }
Example #4
0
  /**
   * Internal method for submitting jobs to the system.
   *
   * <p>The job submission process involves:
   *
   * <ol>
   *   <li>Checking the input and output specifications of the job.
   *   <li>Computing the {@link InputSplit}s for the job.
   *   <li>Setup the requisite accounting information for the {@link DistributedCache} of the job,
   *       if necessary.
   *   <li>Copying the job's jar and configuration to the map-reduce system directory on the
   *       distributed file-system.
   *   <li>Submitting the job to the <code>JobTracker</code> and optionally monitoring it's status.
   * </ol>
   *
   * @param job the configuration to submit
   * @param cluster the handle to the Cluster
   * @throws ClassNotFoundException
   * @throws InterruptedException
   * @throws IOException
   */
  JobStatus submitJobInternal(Job job, Cluster cluster)
      throws ClassNotFoundException, InterruptedException, IOException {

    // validate the jobs output specs
    //// check use new api or not. check output valid or invalid.
    checkSpecs(job);

    //// usually, /tmp/username as staging area.
    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, job.getConfiguration());
    // configure the command line options correctly on the submitting dfs
    Configuration conf = job.getConfiguration();
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
      submitHostAddress = ip.getHostAddress();
      submitHostName = ip.getHostName();
      conf.set(MRJobConfig.JOB_SUBMITHOST, submitHostName);
      conf.set(MRJobConfig.JOB_SUBMITHOSTADDR, submitHostAddress);
    }
    JobID jobId = submitClient.getNewJobID();
    job.setJobID(jobId);
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;
    try {
      conf.set(
          "hadoop.http.filter.initializers",
          "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");

      //// set mapreduce_job_dir as job staging tmp dir.
      conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString());
      LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
      // get delegation token for the dir

      // secret be stored in TokenCache before job be submitted. and get secret during job running.
      //// this is a static method.
      //// secret stored before submission and read during task.
      TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] {submitJobDir}, conf);

      //// get secret keys and store them into TokenCache.
      populateTokenCache(conf, job.getCredentials());

      //// copy lib、files... into submitJobDir
      copyAndConfigureFiles(job, submitJobDir);
      //// change job.xml dir to Path.
      Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);

      // Create the splits for the job
      LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir));
      //// InputFormat.getSplits()
      int maps = writeSplits(job, submitJobDir);
      conf.setInt(MRJobConfig.NUM_MAPS, maps);
      LOG.info("number of splits:" + maps);

      // write "queue admins of the queue to which job is being submitted"
      // to job file.
      String queue = conf.get(MRJobConfig.QUEUE_NAME, JobConf.DEFAULT_QUEUE_NAME);
      AccessControlList acl = submitClient.getQueueAdmins(queue);
      conf.set(
          toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());

      // removing jobtoken referrals before copying the jobconf to HDFS
      // as the tasks don't need this setting, actually they may break
      // because of it if present as the referral will point to a
      // different job.
      TokenCache.cleanUpTokenReferral(conf);

      // Write job file to submit dir
      writeConf(conf, submitJobFile);

      //
      // Now, actually submit the job (using the submit name)
      //
      printTokens(jobId, job.getCredentials());
      //// find implement usage of submitJob, result: YARNRunner and LocalJobRunner
      status = submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
      if (status != null) {
        return status;
      } else {
        throw new IOException("Could not launch job");
      }
    } finally {
      if (status == null) {
        LOG.info("Cleaning up the staging area " + submitJobDir);
        if (jtFs != null && submitJobDir != null) jtFs.delete(submitJobDir, true);
      }
    }
  }
Example #5
0
  // configures -files, -libjars and -archives.
  //// default replication is 10
  private void copyAndConfigureFiles(Job job, Path submitJobDir, short replication)
      throws IOException {
    Configuration conf = job.getConfiguration();
    if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) {
      LOG.warn(
          "Use GenericOptionsParser for parsing the arguments. "
              + "Applications should implement Tool for the same.");
    }

    // get all the command line arguments passed in by the user conf
    String files = conf.get("tmpfiles");
    String libjars = conf.get("tmpjars");
    String archives = conf.get("tmparchives");
    //// "mapreduce.job.jar"
    String jobJar = job.getJar();

    //
    // Figure out what fs the JobTracker is using.  Copy the
    // job to it, under a temporary name.  This allows DFS to work,
    // and under the local fs also provides UNIX-like object loading
    // semantics.  (that is, if the job file is deleted right after
    // submission, we can still run the submission to completion)
    //

    // Create a number of filenames in the JobTracker's fs namespace
    LOG.debug("default FileSystem: " + jtFs.getUri());
    if (jtFs.exists(submitJobDir)) {
      throw new IOException(
          "Not submitting job. Job directory "
              + submitJobDir
              + " already exists!! This is unexpected.Please check what's there in"
              + " that directory");
    }
    submitJobDir = jtFs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms);
    Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir);
    Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir);
    Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir);
    // add all the command line files/ jars and archive
    // first copy them to jobtrackers filesystem

    if (files != null) {
      FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms);
      String[] fileArr = files.split(",");
      for (String tmpFile : fileArr) {
        URI tmpURI = null;
        try {
          tmpURI = new URI(tmpFile);
        } catch (URISyntaxException e) {
          throw new IllegalArgumentException(e);
        }
        Path tmp = new Path(tmpURI);
        Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication);
        try {
          URI pathURI = getPathURI(newPath, tmpURI.getFragment());
          DistributedCache.addCacheFile(pathURI, conf);
        } catch (URISyntaxException ue) {
          // should not throw a uri exception
          throw new IOException("Failed to create uri for " + tmpFile, ue);
        }
        DistributedCache.createSymlink(conf);
      }
    }

    if (libjars != null) {
      FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms);
      String[] libjarsArr = libjars.split(",");
      for (String tmpjars : libjarsArr) {
        Path tmp = new Path(tmpjars);
        Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication);
        DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf);
      }
    }

    if (archives != null) {
      FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms);
      String[] archivesArr = archives.split(",");
      for (String tmpArchives : archivesArr) {
        URI tmpURI;
        try {
          tmpURI = new URI(tmpArchives);
        } catch (URISyntaxException e) {
          throw new IllegalArgumentException(e);
        }
        Path tmp = new Path(tmpURI);
        Path newPath = copyRemoteFiles(archivesDir, tmp, conf, replication);
        try {
          URI pathURI = getPathURI(newPath, tmpURI.getFragment());
          DistributedCache.addCacheArchive(pathURI, conf);
        } catch (URISyntaxException ue) {
          // should not throw an uri excpetion
          throw new IOException("Failed to create uri for " + tmpArchives, ue);
        }
        DistributedCache.createSymlink(conf);
      }
    }

    if (jobJar != null) { // copy jar to JobTracker's fs
      // use jar name if job is not named.
      if ("".equals(job.getJobName())) {
        job.setJobName(new Path(jobJar).getName());
      }
      copyJar(new Path(jobJar), JobSubmissionFiles.getJobJar(submitJobDir), replication);
      job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString());
    } else {
      LOG.warn(
          "No job jar file set.  User classes may not be found. "
              + "See Job or Job#setJar(String).");
    }

    //  set the timestamps of the archives and files
    ClientDistributedCacheManager.determineTimestamps(conf);
    //  set the public/private visibility of the archives and files
    ClientDistributedCacheManager.determineCacheVisibilities(conf);
    // get DelegationToken for each cached file
    ClientDistributedCacheManager.getDelegationTokens(conf, job.getCredentials());
  }