private void checkLocalizedPath(boolean visibility) throws IOException, LoginException, InterruptedException { TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); String userName = getJobOwnerName(); File workDir = new File(TEST_ROOT_DIR, "workdir"); Path cacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile"); if (visibility) { createPublicTempFile(cacheFile); } else { createPrivateTempFile(cacheFile); } Configuration conf1 = new Configuration(conf); conf1.set("user.name", userName); DistributedCache.addCacheFile(cacheFile.toUri(), conf1); TrackerDistributedCacheManager.determineTimestamps(conf1); TrackerDistributedCacheManager.determineCacheVisibilities(conf1); dumpState(conf1); // Task localizing for job TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1); handle.setupCache( conf1, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf1); TaskDistributedCacheManager.CacheFile c = handle.getCacheFiles().get(0); String distCacheDir; if (visibility) { distCacheDir = TaskTracker.getPublicDistributedCacheDir(); } else { distCacheDir = TaskTracker.getPrivateDistributedCacheDir(userName); } Path localizedPath = manager.getLocalCache( cacheFile.toUri(), conf1, distCacheDir, fs.getFileStatus(cacheFile), false, c.timestamp, visibility, c); assertTrue( "Cache file didn't get localized in the expected directory. " + "Expected localization to happen within " + ROOT_MAPRED_LOCAL_DIR + "/" + distCacheDir + ", but was localized at " + localizedPath, localizedPath.toString().contains(distCacheDir)); if (visibility) { checkPublicFilePermissions(new Path[] {localizedPath}); } else { checkFilePermissions(new Path[] {localizedPath}); } }
public Job(JobID jobid, String jobSubmitDir) throws IOException { this.systemJobDir = new Path(jobSubmitDir); this.systemJobFile = new Path(systemJobDir, "job.xml"); this.id = jobid; this.localFs = FileSystem.getLocal(conf); this.localJobDir = localFs.makeQualified(conf.getLocalPath(jobDir)); this.localJobFile = new Path(this.localJobDir, id + ".xml"); // Manage the distributed cache. If there are files to be copied, // this will trigger localFile to be re-written again. this.trackerDistributedCacheManager = new TrackerDistributedCacheManager(conf, taskController); this.taskDistributedCacheManager = trackerDistributedCacheManager.newTaskDistributedCacheManager(jobid, conf); taskDistributedCacheManager.setupCache(conf, "archive", "archive"); if (DistributedCache.getSymlink(conf)) { // This is not supported largely because, // for a Child subprocess, the cwd in LocalJobRunner // is not a fresh slate, but rather the user's working directory. // This is further complicated because the logic in // setupWorkDir only creates symlinks if there's a jarfile // in the configuration. LOG.warn("LocalJobRunner does not support " + "symlinking into current working dir."); } // Setup the symlinks for the distributed cache. TaskRunner.setupWorkDir(conf, new File(localJobDir.toUri()).getAbsoluteFile()); // Write out configuration file. Instead of copying it from // systemJobFile, we re-write it, since setup(), above, may have // updated it. OutputStream out = localFs.create(localJobFile); try { conf.writeXml(out); } finally { out.close(); } this.job = new JobConf(localJobFile); // Job (the current object) is a Thread, so we wrap its class loader. if (!taskDistributedCacheManager.getClassPaths().isEmpty()) { setContextClassLoader(taskDistributedCacheManager.makeClassLoader(getContextClassLoader())); } profile = new JobProfile( job.getUser(), id, systemJobFile.toString(), "http://localhost:8080/", job.getJobName()); status = new JobStatus(id, 0.0f, 0.0f, JobStatus.RUNNING); jobs.put(id, this); this.start(); }
/** * This is the typical flow for using the DistributedCache classes. * * @throws IOException * @throws LoginException */ public void testManagerFlow() throws IOException, LoginException { if (!canRun()) { return; } // ****** Imitate JobClient code // Configures a task/job with both a regular file and a "classpath" file. Configuration subConf = new Configuration(conf); String userName = getJobOwnerName(); subConf.set("user.name", userName); JobID jobid = new JobID("jt", 1); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf); DistributedCache.addFileToClassPath(secondCacheFile, subConf, FileSystem.get(subConf)); TrackerDistributedCacheManager.determineTimestamps(subConf); TrackerDistributedCacheManager.determineCacheVisibilities(subConf); // ****** End of imitating JobClient code Path jobFile = new Path(TEST_ROOT_DIR, "job.xml"); FileOutputStream os = new FileOutputStream(new File(jobFile.toString())); subConf.writeXml(os); os.close(); // ****** Imitate TaskRunner code. TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(jobid, subConf); assertNull(null, DistributedCache.getLocalCacheFiles(subConf)); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(subConf); // DOESN'T ACTUALLY HAPPEN IN THE TaskRunner (THIS IS A TODO) // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); // // ****** End of imitating TaskRunner code Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf); assertNotNull(null, localCacheFiles); assertEquals(2, localCacheFiles.length); Path cachedFirstFile = localCacheFiles[0]; Path cachedSecondFile = localCacheFiles[1]; assertFileLengthEquals(firstCacheFile, cachedFirstFile); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile)); assertEquals(1, handle.getClassPaths().size()); assertEquals(cachedSecondFile.toString(), handle.getClassPaths().get(0)); checkFilePermissions(localCacheFiles); // Cleanup handle.release(); manager.purgeCache(); assertFalse(pathToFile(cachedFirstFile).exists()); }
public void testFreshness() throws Exception { if (!canRun()) { return; } Configuration myConf = new Configuration(conf); myConf.set("fs.default.name", "refresh:///"); myConf.setClass("fs.refresh.impl", FakeFileSystem.class, FileSystem.class); String userName = getJobOwnerName(); TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(myConf, taskController); // ****** Imitate JobClient code // Configures a task/job with both a regular file and a "classpath" file. Configuration subConf = new Configuration(myConf); subConf.set("user.name", userName); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf); TrackerDistributedCacheManager.determineTimestamps(subConf); TrackerDistributedCacheManager.determineCacheVisibilities(subConf); // ****** End of imitating JobClient code // ****** Imitate TaskRunner code. TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), subConf); assertNull(null, DistributedCache.getLocalCacheFiles(subConf)); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); // TODO this doesn't really happen in the TaskRunner // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); // ****** End of imitating TaskRunner code Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf); assertNotNull(null, localCacheFiles); assertEquals(1, localCacheFiles.length); Path cachedFirstFile = localCacheFiles[0]; assertFileLengthEquals(firstCacheFile, cachedFirstFile); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile)); // release handle.release(); // change the file timestamp FileSystem fs = FileSystem.get(myConf); ((FakeFileSystem) fs).advanceClock(1); // running a task of the same job Throwable th = null; try { handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); } catch (IOException ie) { th = ie; } assertNotNull("Throwable is null", th); assertTrue( "Exception message does not match", th.getMessage().contains("has changed on HDFS since job started")); // release handle.release(); // submit another job Configuration subConf2 = new Configuration(myConf); subConf2.set("user.name", userName); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf2); TrackerDistributedCacheManager.determineTimestamps(subConf2); TrackerDistributedCacheManager.determineCacheVisibilities(subConf2); handle = manager.newTaskDistributedCacheManager(new JobID("jt", 2), subConf2); handle.setupCache( subConf2, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); Path[] localCacheFiles2 = DistributedCache.getLocalCacheFiles(subConf2); assertNotNull(null, localCacheFiles2); assertEquals(1, localCacheFiles2.length); Path cachedFirstFile2 = localCacheFiles2[0]; assertFileLengthEquals(firstCacheFile, cachedFirstFile2); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile2)); // assert that two localizations point to different paths assertFalse( "two jobs with different timestamps did not localize" + " in different paths", cachedFirstFile.equals(cachedFirstFile2)); // release handle.release(); }
public void testReferenceCount() throws IOException, LoginException, URISyntaxException, InterruptedException { if (!canRun()) { return; } TrackerDistributedCacheManager manager = new FakeTrackerDistributedCacheManager(conf); String userName = getJobOwnerName(); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); // Configures a job with a regular file Job job1 = new Job(conf); Configuration conf1 = job1.getConfiguration(); conf1.set("user.name", userName); DistributedCache.addCacheFile(secondCacheFile.toUri(), conf1); TrackerDistributedCacheManager.determineTimestamps(conf1); TrackerDistributedCacheManager.determineCacheVisibilities(conf1); // Task localizing for first job TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1); handle.setupCache( conf1, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf1); handle.release(); for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) { assertEquals(0, manager.getReferenceCount(c.getStatus())); } Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile"); createPrivateTempFile(thirdCacheFile); // Configures another job with three regular files. Job job2 = new Job(conf); Configuration conf2 = job2.getConfiguration(); conf2.set("user.name", userName); // add a file that would get failed to localize DistributedCache.addCacheFile(firstCacheFilePublic.toUri(), conf2); // add a file that is already localized by different job DistributedCache.addCacheFile(secondCacheFile.toUri(), conf2); // add a file that is never localized DistributedCache.addCacheFile(thirdCacheFile.toUri(), conf2); TrackerDistributedCacheManager.determineTimestamps(conf2); TrackerDistributedCacheManager.determineCacheVisibilities(conf2); // Task localizing for second job // localization for the "firstCacheFile" will fail. handle = manager.newTaskDistributedCacheManager(new JobID("jt", 2), conf2); Throwable th = null; try { handle.setupCache( conf2, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf2); } catch (IOException e) { th = e; LOG.info("Exception during setup", e); } assertNotNull(th); assertTrue(th.getMessage().contains("fake fail")); handle.release(); th = null; for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) { try { int refcount = manager.getReferenceCount(c.getStatus()); LOG.info("checking refcount " + c.uri + " of " + refcount); assertEquals(0, refcount); } catch (NullPointerException ie) { th = ie; LOG.info("Exception getting reference count for " + c.uri, ie); } } assertNotNull(th); fs.delete(thirdCacheFile, false); }