/** * Sets the connector information needed to communicate with Accumulo in this job. * * <p><b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the * configuration as a means to pass the token to MapReduce tasks. This information is BASE64 * encoded to provide a charset safe conversion to a string, but this conversion is not intended * to be secure. {@link PasswordToken} is one example that is insecure in this way; however {@link * DelegationToken}s, acquired using {@link * SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern. * * @param job the Hadoop job instance to be configured * @param principal a valid Accumulo user name (user must have Table.CREATE permission) * @param token the user's password * @since 1.5.0 */ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException { if (token instanceof KerberosToken) { log.info("Received KerberosToken, attempting to fetch DelegationToken"); try { Instance instance = getInstance(job); Connector conn = instance.getConnector(principal, token); token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig()); } catch (Exception e) { log.warn( "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo", e); } } // DelegationTokens can be passed securely from user to task without serializing insecurely in // the configuration if (token instanceof DelegationTokenImpl) { DelegationTokenImpl delegationToken = (DelegationTokenImpl) token; // Convert it into a Hadoop Token AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier(); Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>( identifier.getBytes(), delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName()); // Add the Hadoop Token to the Job so it gets serialized and passed along. job.getCredentials().addToken(hadoopToken.getService(), hadoopToken); } InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token); }
/** * Create input listing by invoking an appropriate copy listing implementation. Also add * delegation tokens for each path to job's credential store * * @param job - Handle to job * @return Returns the path where the copy listing is created * @throws IOException - If any */ protected Path createInputFileListing(Job job) throws IOException { Path fileListingPath = getFileListingPath(); CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(), job.getCredentials(), inputOptions); copyListing.buildListing(fileListingPath, inputOptions); LOG.info("Number of paths considered for copy: " + copyListing.getNumberOfPaths()); LOG.info( "Number of bytes considered for copy: " + copyListing.getBytesToCopy() + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)"); return fileListingPath; }
private void populateTokens(Job job) { // Credentials in the job will not have delegation tokens // because security is disabled. Fetch delegation tokens // and populate the credential in the job. try { Credentials ts = job.getCredentials(); Path p1 = new Path("file1"); p1 = p1.getFileSystem(job.getConfiguration()).makeQualified(p1); Credentials cred = new Credentials(); TokenCache.obtainTokensForNamenodesInternal(cred, new Path[] {p1}, job.getConfiguration()); for (Token<? extends TokenIdentifier> t : cred.getAllTokens()) { ts.addToken(new Text("Hdfs"), t); } } catch (IOException e) { Assert.fail("Exception " + e); } }
/** * Internal method for submitting jobs to the system. * * <p>The job submission process involves: * * <ol> * <li>Checking the input and output specifications of the job. * <li>Computing the {@link InputSplit}s for the job. * <li>Setup the requisite accounting information for the {@link DistributedCache} of the job, * if necessary. * <li>Copying the job's jar and configuration to the map-reduce system directory on the * distributed file-system. * <li>Submitting the job to the <code>JobTracker</code> and optionally monitoring it's status. * </ol> * * @param job the configuration to submit * @param cluster the handle to the Cluster * @throws ClassNotFoundException * @throws InterruptedException * @throws IOException */ JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException { // validate the jobs output specs //// check use new api or not. check output valid or invalid. checkSpecs(job); //// usually, /tmp/username as staging area. Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, job.getConfiguration()); // configure the command line options correctly on the submitting dfs Configuration conf = job.getConfiguration(); InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { submitHostAddress = ip.getHostAddress(); submitHostName = ip.getHostName(); conf.set(MRJobConfig.JOB_SUBMITHOST, submitHostName); conf.set(MRJobConfig.JOB_SUBMITHOSTADDR, submitHostAddress); } JobID jobId = submitClient.getNewJobID(); job.setJobID(jobId); Path submitJobDir = new Path(jobStagingArea, jobId.toString()); JobStatus status = null; try { conf.set( "hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer"); //// set mapreduce_job_dir as job staging tmp dir. conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString()); LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir"); // get delegation token for the dir // secret be stored in TokenCache before job be submitted. and get secret during job running. //// this is a static method. //// secret stored before submission and read during task. TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] {submitJobDir}, conf); //// get secret keys and store them into TokenCache. populateTokenCache(conf, job.getCredentials()); //// copy libăfiles... into submitJobDir copyAndConfigureFiles(job, submitJobDir); //// change job.xml dir to Path. Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir); // Create the splits for the job LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir)); //// InputFormat.getSplits() int maps = writeSplits(job, submitJobDir); conf.setInt(MRJobConfig.NUM_MAPS, maps); LOG.info("number of splits:" + maps); // write "queue admins of the queue to which job is being submitted" // to job file. String queue = conf.get(MRJobConfig.QUEUE_NAME, JobConf.DEFAULT_QUEUE_NAME); AccessControlList acl = submitClient.getQueueAdmins(queue); conf.set( toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString()); // removing jobtoken referrals before copying the jobconf to HDFS // as the tasks don't need this setting, actually they may break // because of it if present as the referral will point to a // different job. TokenCache.cleanUpTokenReferral(conf); // Write job file to submit dir writeConf(conf, submitJobFile); // // Now, actually submit the job (using the submit name) // printTokens(jobId, job.getCredentials()); //// find implement usage of submitJob, result: YARNRunner and LocalJobRunner status = submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials()); if (status != null) { return status; } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { LOG.info("Cleaning up the staging area " + submitJobDir); if (jtFs != null && submitJobDir != null) jtFs.delete(submitJobDir, true); } } }
// configures -files, -libjars and -archives. //// default replication is 10 private void copyAndConfigureFiles(Job job, Path submitJobDir, short replication) throws IOException { Configuration conf = job.getConfiguration(); if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) { LOG.warn( "Use GenericOptionsParser for parsing the arguments. " + "Applications should implement Tool for the same."); } // get all the command line arguments passed in by the user conf String files = conf.get("tmpfiles"); String libjars = conf.get("tmpjars"); String archives = conf.get("tmparchives"); //// "mapreduce.job.jar" String jobJar = job.getJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace LOG.debug("default FileSystem: " + jtFs.getUri()); if (jtFs.exists(submitJobDir)) { throw new IOException( "Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms); String[] fileArr = files.split(","); for (String tmpFile : fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } DistributedCache.createSymlink(conf); } } if (libjars != null) { FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms); String[] libjarsArr = libjars.split(","); for (String tmpjars : libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf); } } if (archives != null) { FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms); String[] archivesArr = archives.split(","); for (String tmpArchives : archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } DistributedCache.createSymlink(conf); } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(jobJar).getName()); } copyJar(new Path(jobJar), JobSubmissionFiles.getJobJar(submitJobDir), replication); job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString()); } else { LOG.warn( "No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)."); } // set the timestamps of the archives and files ClientDistributedCacheManager.determineTimestamps(conf); // set the public/private visibility of the archives and files ClientDistributedCacheManager.determineCacheVisibilities(conf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(conf, job.getCredentials()); }