// configures -files, -libjars and -archives. //// default replication is 10 private void copyAndConfigureFiles(Job job, Path submitJobDir, short replication) throws IOException { Configuration conf = job.getConfiguration(); if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) { LOG.warn( "Use GenericOptionsParser for parsing the arguments. " + "Applications should implement Tool for the same."); } // get all the command line arguments passed in by the user conf String files = conf.get("tmpfiles"); String libjars = conf.get("tmpjars"); String archives = conf.get("tmparchives"); //// "mapreduce.job.jar" String jobJar = job.getJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace LOG.debug("default FileSystem: " + jtFs.getUri()); if (jtFs.exists(submitJobDir)) { throw new IOException( "Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms); String[] fileArr = files.split(","); for (String tmpFile : fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } DistributedCache.createSymlink(conf); } } if (libjars != null) { FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms); String[] libjarsArr = libjars.split(","); for (String tmpjars : libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf); } } if (archives != null) { FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms); String[] archivesArr = archives.split(","); for (String tmpArchives : archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } DistributedCache.createSymlink(conf); } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(jobJar).getName()); } copyJar(new Path(jobJar), JobSubmissionFiles.getJobJar(submitJobDir), replication); job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString()); } else { LOG.warn( "No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)."); } // set the timestamps of the archives and files ClientDistributedCacheManager.determineTimestamps(conf); // set the public/private visibility of the archives and files ClientDistributedCacheManager.determineCacheVisibilities(conf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(conf, job.getCredentials()); }