/** * This is the typical flow for using the DistributedCache classes. * * @throws IOException * @throws LoginException */ public void testManagerFlow() throws IOException, LoginException { if (!canRun()) { return; } // ****** Imitate JobClient code // Configures a task/job with both a regular file and a "classpath" file. Configuration subConf = new Configuration(conf); String userName = getJobOwnerName(); subConf.set("user.name", userName); JobID jobid = new JobID("jt", 1); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf); DistributedCache.addFileToClassPath(secondCacheFile, subConf, FileSystem.get(subConf)); TrackerDistributedCacheManager.determineTimestamps(subConf); TrackerDistributedCacheManager.determineCacheVisibilities(subConf); // ****** End of imitating JobClient code Path jobFile = new Path(TEST_ROOT_DIR, "job.xml"); FileOutputStream os = new FileOutputStream(new File(jobFile.toString())); subConf.writeXml(os); os.close(); // ****** Imitate TaskRunner code. TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(jobid, subConf); assertNull(null, DistributedCache.getLocalCacheFiles(subConf)); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(subConf); // DOESN'T ACTUALLY HAPPEN IN THE TaskRunner (THIS IS A TODO) // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); // // ****** End of imitating TaskRunner code Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf); assertNotNull(null, localCacheFiles); assertEquals(2, localCacheFiles.length); Path cachedFirstFile = localCacheFiles[0]; Path cachedSecondFile = localCacheFiles[1]; assertFileLengthEquals(firstCacheFile, cachedFirstFile); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile)); assertEquals(1, handle.getClassPaths().size()); assertEquals(cachedSecondFile.toString(), handle.getClassPaths().get(0)); checkFilePermissions(localCacheFiles); // Cleanup handle.release(); manager.purgeCache(); assertFalse(pathToFile(cachedFirstFile).exists()); }
static List<String> getClassPaths( JobConf conf, File workDir, TaskDistributedCacheManager taskDistributedCacheManager) throws IOException { // Accumulates class paths for child. List<String> classPaths = new ArrayList<String>(); boolean userClassesTakesPrecedence = conf.userClassesTakesPrecedence(); if (!userClassesTakesPrecedence) { // start with same classpath as parent process appendSystemClasspaths(classPaths); } // include the user specified classpath appendJobJarClasspaths(conf.getJar(), classPaths); // Distributed cache paths if (taskDistributedCacheManager != null) classPaths.addAll(taskDistributedCacheManager.getClassPaths()); // Include the working dir too classPaths.add(workDir.toString()); if (userClassesTakesPrecedence) { // parent process's classpath is added last appendSystemClasspaths(classPaths); } return classPaths; }
public Job(JobID jobid, String jobSubmitDir) throws IOException { this.systemJobDir = new Path(jobSubmitDir); this.systemJobFile = new Path(systemJobDir, "job.xml"); this.id = jobid; this.localFs = FileSystem.getLocal(conf); this.localJobDir = localFs.makeQualified(conf.getLocalPath(jobDir)); this.localJobFile = new Path(this.localJobDir, id + ".xml"); // Manage the distributed cache. If there are files to be copied, // this will trigger localFile to be re-written again. this.trackerDistributedCacheManager = new TrackerDistributedCacheManager(conf, taskController); this.taskDistributedCacheManager = trackerDistributedCacheManager.newTaskDistributedCacheManager(jobid, conf); taskDistributedCacheManager.setupCache(conf, "archive", "archive"); if (DistributedCache.getSymlink(conf)) { // This is not supported largely because, // for a Child subprocess, the cwd in LocalJobRunner // is not a fresh slate, but rather the user's working directory. // This is further complicated because the logic in // setupWorkDir only creates symlinks if there's a jarfile // in the configuration. LOG.warn("LocalJobRunner does not support " + "symlinking into current working dir."); } // Setup the symlinks for the distributed cache. TaskRunner.setupWorkDir(conf, new File(localJobDir.toUri()).getAbsoluteFile()); // Write out configuration file. Instead of copying it from // systemJobFile, we re-write it, since setup(), above, may have // updated it. OutputStream out = localFs.create(localJobFile); try { conf.writeXml(out); } finally { out.close(); } this.job = new JobConf(localJobFile); // Job (the current object) is a Thread, so we wrap its class loader. if (!taskDistributedCacheManager.getClassPaths().isEmpty()) { setContextClassLoader(taskDistributedCacheManager.makeClassLoader(getContextClassLoader())); } profile = new JobProfile( job.getUser(), id, systemJobFile.toString(), "http://localhost:8080/", job.getJobName()); status = new JobStatus(id, 0.0f, 0.0f, JobStatus.RUNNING); jobs.put(id, this); this.start(); }
private static List<String> getClassPaths( JobConf conf, File workDir, TaskDistributedCacheManager taskDistributedCacheManager) throws IOException { // Accumulates class paths for child. List<String> classPaths = new ArrayList<String>(); // start with same classpath as parent process appendSystemClasspaths(classPaths); // include the user specified classpath appendJobJarClasspaths(conf.getJar(), classPaths); // Distributed cache paths classPaths.addAll(taskDistributedCacheManager.getClassPaths()); // Include the working dir too classPaths.add(workDir.toString()); return classPaths; }