private void checkLocalizedPath(boolean visibility) throws IOException, LoginException, InterruptedException { TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); String userName = getJobOwnerName(); File workDir = new File(TEST_ROOT_DIR, "workdir"); Path cacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile"); if (visibility) { createPublicTempFile(cacheFile); } else { createPrivateTempFile(cacheFile); } Configuration conf1 = new Configuration(conf); conf1.set("user.name", userName); DistributedCache.addCacheFile(cacheFile.toUri(), conf1); TrackerDistributedCacheManager.determineTimestamps(conf1); TrackerDistributedCacheManager.determineCacheVisibilities(conf1); dumpState(conf1); // Task localizing for job TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1); handle.setupCache( conf1, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf1); TaskDistributedCacheManager.CacheFile c = handle.getCacheFiles().get(0); String distCacheDir; if (visibility) { distCacheDir = TaskTracker.getPublicDistributedCacheDir(); } else { distCacheDir = TaskTracker.getPrivateDistributedCacheDir(userName); } Path localizedPath = manager.getLocalCache( cacheFile.toUri(), conf1, distCacheDir, fs.getFileStatus(cacheFile), false, c.timestamp, visibility, c); assertTrue( "Cache file didn't get localized in the expected directory. " + "Expected localization to happen within " + ROOT_MAPRED_LOCAL_DIR + "/" + distCacheDir + ", but was localized at " + localizedPath, localizedPath.toString().contains(distCacheDir)); if (visibility) { checkPublicFilePermissions(new Path[] {localizedPath}); } else { checkFilePermissions(new Path[] {localizedPath}); } }
/** * This is the typical flow for using the DistributedCache classes. * * @throws IOException * @throws LoginException */ public void testManagerFlow() throws IOException, LoginException { if (!canRun()) { return; } // ****** Imitate JobClient code // Configures a task/job with both a regular file and a "classpath" file. Configuration subConf = new Configuration(conf); String userName = getJobOwnerName(); subConf.set("user.name", userName); JobID jobid = new JobID("jt", 1); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf); DistributedCache.addFileToClassPath(secondCacheFile, subConf, FileSystem.get(subConf)); TrackerDistributedCacheManager.determineTimestamps(subConf); TrackerDistributedCacheManager.determineCacheVisibilities(subConf); // ****** End of imitating JobClient code Path jobFile = new Path(TEST_ROOT_DIR, "job.xml"); FileOutputStream os = new FileOutputStream(new File(jobFile.toString())); subConf.writeXml(os); os.close(); // ****** Imitate TaskRunner code. TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(jobid, subConf); assertNull(null, DistributedCache.getLocalCacheFiles(subConf)); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(subConf); // DOESN'T ACTUALLY HAPPEN IN THE TaskRunner (THIS IS A TODO) // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); // // ****** End of imitating TaskRunner code Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf); assertNotNull(null, localCacheFiles); assertEquals(2, localCacheFiles.length); Path cachedFirstFile = localCacheFiles[0]; Path cachedSecondFile = localCacheFiles[1]; assertFileLengthEquals(firstCacheFile, cachedFirstFile); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile)); assertEquals(1, handle.getClassPaths().size()); assertEquals(cachedSecondFile.toString(), handle.getClassPaths().get(0)); checkFilePermissions(localCacheFiles); // Cleanup handle.release(); manager.purgeCache(); assertFalse(pathToFile(cachedFirstFile).exists()); }
private void dumpState(Configuration conf1) throws IOException { StringBuilder buf = new StringBuilder(); buf.append("\nFiles:"); appendUriArray(buf, DistributedCache.getCacheFiles(conf1)); buf.append("\nArchives:"); appendUriArray(buf, DistributedCache.getCacheArchives(conf1)); buf.append("\nFile Visible:"); appendBooleanArray(buf, TrackerDistributedCacheManager.getFileVisibilities(conf1)); buf.append("\nArchive Visible:"); appendBooleanArray(buf, TrackerDistributedCacheManager.getArchiveVisibilities(conf1)); buf.append("\nFile timestamps:"); appendLongArray(buf, DistributedCache.getFileTimestamps(conf1)); buf.append("\nArchive timestamps:"); appendLongArray(buf, DistributedCache.getArchiveTimestamps(conf1)); LOG.info("state = " + buf.toString()); }
public Job(JobID jobid, String jobSubmitDir) throws IOException { this.systemJobDir = new Path(jobSubmitDir); this.systemJobFile = new Path(systemJobDir, "job.xml"); this.id = jobid; this.localFs = FileSystem.getLocal(conf); this.localJobDir = localFs.makeQualified(conf.getLocalPath(jobDir)); this.localJobFile = new Path(this.localJobDir, id + ".xml"); // Manage the distributed cache. If there are files to be copied, // this will trigger localFile to be re-written again. this.trackerDistributedCacheManager = new TrackerDistributedCacheManager(conf, taskController); this.taskDistributedCacheManager = trackerDistributedCacheManager.newTaskDistributedCacheManager(jobid, conf); taskDistributedCacheManager.setupCache(conf, "archive", "archive"); if (DistributedCache.getSymlink(conf)) { // This is not supported largely because, // for a Child subprocess, the cwd in LocalJobRunner // is not a fresh slate, but rather the user's working directory. // This is further complicated because the logic in // setupWorkDir only creates symlinks if there's a jarfile // in the configuration. LOG.warn("LocalJobRunner does not support " + "symlinking into current working dir."); } // Setup the symlinks for the distributed cache. TaskRunner.setupWorkDir(conf, new File(localJobDir.toUri()).getAbsoluteFile()); // Write out configuration file. Instead of copying it from // systemJobFile, we re-write it, since setup(), above, may have // updated it. OutputStream out = localFs.create(localJobFile); try { conf.writeXml(out); } finally { out.close(); } this.job = new JobConf(localJobFile); // Job (the current object) is a Thread, so we wrap its class loader. if (!taskDistributedCacheManager.getClassPaths().isEmpty()) { setContextClassLoader(taskDistributedCacheManager.makeClassLoader(getContextClassLoader())); } profile = new JobProfile( job.getUser(), id, systemJobFile.toString(), "http://localhost:8080/", job.getJobName()); status = new JobStatus(id, 0.0f, 0.0f, JobStatus.RUNNING); jobs.put(id, this); this.start(); }
public void testFileSystemOtherThanDefault() throws Exception { if (!canRun()) { return; } TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); conf.set("fs.fakefile.impl", conf.get("fs.file.impl")); String userName = getJobOwnerName(); conf.set("user.name", userName); Path fileToCache = new Path("fakefile:///" + firstCacheFile.toUri().getPath()); CacheFile file = new CacheFile(fileToCache.toUri(), CacheFile.FileType.REGULAR, false, 0, false); Path result = manager.getLocalCache( fileToCache.toUri(), conf, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(firstCacheFile), false, System.currentTimeMillis(), false, file); assertNotNull("DistributedCache cached file on non-default filesystem.", result); }
/** * Creates distributed cache symlinks and tmp directory, as appropriate. Note that when we setup * the distributed cache, we didn't create the symlinks. This is done on a per task basis by the * currently executing task. * * @param conf The job configuration. * @param workDir Working directory, which is completely deleted. */ public static void setupWorkDir(JobConf conf, File workDir) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Fully deleting contents of " + workDir); } /** * delete only the contents of workDir leaving the directory empty. We can't delete the workDir * as it is the current working directory. */ FileUtil.fullyDeleteContents(workDir); if (DistributedCache.getSymlink(conf)) { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { String link = archives[i].getFragment(); String target = localArchives[i].toString(); symlink(workDir, target, link); } } if (files != null) { for (int i = 0; i < files.length; i++) { String link = files[i].getFragment(); String target = localFiles[i].toString(); symlink(workDir, target, link); } } } if (conf.getJar() != null) { File jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); // create symlinks for all the files in job cache dir in current // workingdir for streaming try { TrackerDistributedCacheManager.createAllSymlink(conf, jobCacheDir, workDir); } catch (IOException ie) { // Do not exit even if symlinks have not been created. LOG.warn(StringUtils.stringifyException(ie)); } } createChildTmpDir(workDir, conf, true); }
/** * Localize a file. After localization is complete, create a file, "myFile", under the directory * where the file is localized and ensure that it has permissions different from what is set by * default. Then, localize another file. Verify that "myFile" has the right permissions. * * @throws Exception */ public void testCustomPermissions() throws Exception { if (!canRun()) { return; } String userName = getJobOwnerName(); conf.set("user.name", userName); TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf, taskController); FileSystem localfs = FileSystem.getLocal(conf); long now = System.currentTimeMillis(); Path[] localCache = new Path[2]; FileStatus stat = fs.getFileStatus(firstCacheFile); CacheFile file = new CacheFile( firstCacheFilePublic.toUri(), CacheFile.FileType.REGULAR, true, stat.getModificationTime(), false); localCache[0] = manager.getLocalCache( firstCacheFilePublic.toUri(), conf, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(firstCacheFilePublic), false, fs.getFileStatus(firstCacheFilePublic).getModificationTime(), true, file); FsPermission myPermission = new FsPermission((short) 0600); Path myFile = new Path(localCache[0].getParent(), "myfile.txt"); if (FileSystem.create(localfs, myFile, myPermission) == null) { throw new IOException("Could not create " + myFile); } try { stat = fs.getFileStatus(secondCacheFilePublic); file = new CacheFile( secondCacheFilePublic.toUri(), CacheFile.FileType.REGULAR, true, stat.getModificationTime(), false); localCache[1] = manager.getLocalCache( secondCacheFilePublic.toUri(), conf, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(secondCacheFilePublic), false, fs.getFileStatus(secondCacheFilePublic).getModificationTime(), true, file); stat = localfs.getFileStatus(myFile); assertTrue(stat.getPermission().equals(myPermission)); // validate permissions of localized files. checkFilePermissions(localCache); } finally { localfs.delete(myFile, false); } }
public void testFreshness() throws Exception { if (!canRun()) { return; } Configuration myConf = new Configuration(conf); myConf.set("fs.default.name", "refresh:///"); myConf.setClass("fs.refresh.impl", FakeFileSystem.class, FileSystem.class); String userName = getJobOwnerName(); TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(myConf, taskController); // ****** Imitate JobClient code // Configures a task/job with both a regular file and a "classpath" file. Configuration subConf = new Configuration(myConf); subConf.set("user.name", userName); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf); TrackerDistributedCacheManager.determineTimestamps(subConf); TrackerDistributedCacheManager.determineCacheVisibilities(subConf); // ****** End of imitating JobClient code // ****** Imitate TaskRunner code. TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), subConf); assertNull(null, DistributedCache.getLocalCacheFiles(subConf)); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); // TODO this doesn't really happen in the TaskRunner // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); // ****** End of imitating TaskRunner code Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf); assertNotNull(null, localCacheFiles); assertEquals(1, localCacheFiles.length); Path cachedFirstFile = localCacheFiles[0]; assertFileLengthEquals(firstCacheFile, cachedFirstFile); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile)); // release handle.release(); // change the file timestamp FileSystem fs = FileSystem.get(myConf); ((FakeFileSystem) fs).advanceClock(1); // running a task of the same job Throwable th = null; try { handle.setupCache( subConf, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); // handle.setupPrivateCache(localDirAllocator, TaskTracker // .getPrivateDistributedCacheDir(userName)); } catch (IOException ie) { th = ie; } assertNotNull("Throwable is null", th); assertTrue( "Exception message does not match", th.getMessage().contains("has changed on HDFS since job started")); // release handle.release(); // submit another job Configuration subConf2 = new Configuration(myConf); subConf2.set("user.name", userName); DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf2); TrackerDistributedCacheManager.determineTimestamps(subConf2); TrackerDistributedCacheManager.determineCacheVisibilities(subConf2); handle = manager.newTaskDistributedCacheManager(new JobID("jt", 2), subConf2); handle.setupCache( subConf2, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); Path[] localCacheFiles2 = DistributedCache.getLocalCacheFiles(subConf2); assertNotNull(null, localCacheFiles2); assertEquals(1, localCacheFiles2.length); Path cachedFirstFile2 = localCacheFiles2[0]; assertFileLengthEquals(firstCacheFile, cachedFirstFile2); assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile2)); // assert that two localizations point to different paths assertFalse( "two jobs with different timestamps did not localize" + " in different paths", cachedFirstFile.equals(cachedFirstFile2)); // release handle.release(); }
/** test delete cache */ public void testDeleteCache() throws Exception { if (!canRun()) { return; } // This test needs mapred.local.dir to be single directory // instead of four, because it assumes that both // firstcachefile and secondcachefile will be localized on same directory // so that second localization triggers deleteCache. // If mapred.local.dir is four directories, second localization might not // trigger deleteCache, if it is localized in different directory. Configuration conf2 = new Configuration(conf); conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString()); conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT); refreshConf(conf2); TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(conf2, taskController); FileSystem localfs = FileSystem.getLocal(conf2); long now = System.currentTimeMillis(); String userName = getJobOwnerName(); conf2.set("user.name", userName); // We first test the size limit FileStatus stat = fs.getFileStatus(firstCacheFilePublic); CacheFile cfile1 = new CacheFile( firstCacheFilePublic.toUri(), CacheFile.FileType.REGULAR, true, stat.getModificationTime(), true); Path firstLocalCache = manager.getLocalCache( firstCacheFilePublic.toUri(), conf2, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(firstCacheFilePublic), false, fs.getFileStatus(firstCacheFilePublic).getModificationTime(), true, cfile1); manager.releaseCache(cfile1.getStatus()); // in above code,localized a file of size 4K and then release the cache // which will cause the cache be deleted when the limit goes out. // The below code localize another cache which's designed to // sweep away the first cache. stat = fs.getFileStatus(secondCacheFilePublic); CacheFile cfile2 = new CacheFile( secondCacheFilePublic.toUri(), CacheFile.FileType.REGULAR, true, stat.getModificationTime(), true); assertTrue( "DistributedCache currently doesn't have cached file", localfs.exists(firstLocalCache)); Path secondLocalCache = manager.getLocalCache( secondCacheFilePublic.toUri(), conf2, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(secondCacheFilePublic), false, fs.getFileStatus(secondCacheFilePublic).getModificationTime(), true, cfile2); assertFalse( "DistributedCache failed deleting old" + " cache when the cache store is full.", localfs.exists(firstLocalCache)); // find the root directory of distributed caches Path firstCursor = firstLocalCache; Path secondCursor = secondLocalCache; while (!firstCursor.equals(secondCursor)) { // Debug code, to see what these things look like System.err.println("cursors: " + firstCursor); System.err.println(" and " + secondCursor); firstCursor = firstCursor.getParent(); secondCursor = secondCursor.getParent(); } System.err.println("The final cursor is " + firstCursor); System.err.println( "That directory ends up with " + localfs.listStatus(firstCursor).length + " subdirectories"); Path cachesBase = firstCursor; assertFalse( "DistributedCache did not delete the gensym'ed distcache " + "directory names when it deleted the files they contained " + "because they collectively exceeded the size limit.", localfs.listStatus(cachesBase).length > 1); conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT * 10); conf2.setLong("mapreduce.tasktracker.local.cache.numberdirectories", LOCAL_CACHE_SUBDIR_LIMIT); manager = new TrackerDistributedCacheManager(conf2, taskController); // Now we test the number of sub directories limit // Create the temporary cache files to be used in the tests. Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile"); Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile"); // Adding two more small files, so it triggers the number of sub directory // limit but does not trigger the file size limit. createPrivateTempFile(thirdCacheFile); createPrivateTempFile(fourthCacheFile); DistributedCache.setCacheFiles(new URI[] {thirdCacheFile.toUri()}, conf2); TrackerDistributedCacheManager.determineCacheVisibilities(conf2); TrackerDistributedCacheManager.determineTimestamps(conf2); stat = fs.getFileStatus(thirdCacheFile); CacheFile cfile3 = new CacheFile( thirdCacheFile.toUri(), CacheFile.FileType.REGULAR, false, stat.getModificationTime(), true); Path thirdLocalCache = manager.getLocalCache( thirdCacheFile.toUri(), conf2, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(thirdCacheFile), false, fs.getFileStatus(thirdCacheFile).getModificationTime(), false, cfile3); DistributedCache.setLocalFiles(conf2, thirdLocalCache.toString()); JobLocalizer.downloadPrivateCache(conf2); // Release the third cache so that it can be deleted while sweeping manager.releaseCache(cfile3.getStatus()); // Getting the fourth cache will make the number of sub directories becomes // 3 which is greater than 2. So the released cache will be deleted. stat = fs.getFileStatus(fourthCacheFile); CacheFile cfile4 = new CacheFile( fourthCacheFile.toUri(), CacheFile.FileType.REGULAR, false, stat.getModificationTime(), true); assertTrue( "DistributedCache currently doesn't have cached file", localfs.exists(thirdLocalCache)); DistributedCache.setCacheFiles(new URI[] {fourthCacheFile.toUri()}, conf2); DistributedCache.setLocalFiles(conf2, thirdCacheFile.toUri().toString()); TrackerDistributedCacheManager.determineCacheVisibilities(conf2); TrackerDistributedCacheManager.determineTimestamps(conf2); Path fourthLocalCache = manager.getLocalCache( fourthCacheFile.toUri(), conf2, TaskTracker.getPrivateDistributedCacheDir(userName), fs.getFileStatus(fourthCacheFile), false, fs.getFileStatus(fourthCacheFile).getModificationTime(), false, cfile4); assertFalse( "DistributedCache failed deleting old" + " cache when the cache exceeds the number of sub directories limit.", localfs.exists(thirdLocalCache)); assertFalse( "DistributedCache did not delete the gensym'ed distcache " + "directory names when it deleted the files they contained " + "because there were too many.", localfs.listStatus(cachesBase).length > LOCAL_CACHE_SUBDIR_LIMIT); // Clean up the files created in this test new File(thirdCacheFile.toString()).delete(); new File(fourthCacheFile.toString()).delete(); }
public void testReferenceCount() throws IOException, LoginException, URISyntaxException, InterruptedException { if (!canRun()) { return; } TrackerDistributedCacheManager manager = new FakeTrackerDistributedCacheManager(conf); String userName = getJobOwnerName(); File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString()); // Configures a job with a regular file Job job1 = new Job(conf); Configuration conf1 = job1.getConfiguration(); conf1.set("user.name", userName); DistributedCache.addCacheFile(secondCacheFile.toUri(), conf1); TrackerDistributedCacheManager.determineTimestamps(conf1); TrackerDistributedCacheManager.determineCacheVisibilities(conf1); // Task localizing for first job TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1); handle.setupCache( conf1, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf1); handle.release(); for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) { assertEquals(0, manager.getReferenceCount(c.getStatus())); } Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile"); createPrivateTempFile(thirdCacheFile); // Configures another job with three regular files. Job job2 = new Job(conf); Configuration conf2 = job2.getConfiguration(); conf2.set("user.name", userName); // add a file that would get failed to localize DistributedCache.addCacheFile(firstCacheFilePublic.toUri(), conf2); // add a file that is already localized by different job DistributedCache.addCacheFile(secondCacheFile.toUri(), conf2); // add a file that is never localized DistributedCache.addCacheFile(thirdCacheFile.toUri(), conf2); TrackerDistributedCacheManager.determineTimestamps(conf2); TrackerDistributedCacheManager.determineCacheVisibilities(conf2); // Task localizing for second job // localization for the "firstCacheFile" will fail. handle = manager.newTaskDistributedCacheManager(new JobID("jt", 2), conf2); Throwable th = null; try { handle.setupCache( conf2, TaskTracker.getPublicDistributedCacheDir(), TaskTracker.getPrivateDistributedCacheDir(userName)); JobLocalizer.downloadPrivateCache(conf2); } catch (IOException e) { th = e; LOG.info("Exception during setup", e); } assertNotNull(th); assertTrue(th.getMessage().contains("fake fail")); handle.release(); th = null; for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) { try { int refcount = manager.getReferenceCount(c.getStatus()); LOG.info("checking refcount " + c.uri + " of " + refcount); assertEquals(0, refcount); } catch (NullPointerException ie) { th = ie; LOG.info("Exception getting reference count for " + c.uri, ie); } } assertNotNull(th); fs.delete(thirdCacheFile, false); }
@Override public void updatePrivateDistributedCacheSizes( org.apache.hadoop.mapreduce.JobID jobId, long[] sizes) throws IOException { trackerDistributedCacheManager.setArchiveSizes(jobId, sizes); }
@SuppressWarnings("unchecked") @Override public void run() { JobID jobId = profile.getJobID(); JobContext jContext = new JobContextImpl(conf, jobId); org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = null; try { outputCommitter = createOutputCommitter(conf.getUseNewMapper(), jobId, conf); } catch (Exception e) { LOG.info("Failed to createOutputCommitter", e); return; } try { TaskSplitMetaInfo[] taskSplitMetaInfos = SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir); int numReduceTasks = job.getNumReduceTasks(); if (numReduceTasks > 1 || numReduceTasks < 0) { // we only allow 0 or 1 reducer in local mode numReduceTasks = 1; job.setNumReduceTasks(1); } outputCommitter.setupJob(jContext); status.setSetupProgress(1.0f); Map<TaskAttemptID, MapOutputFile> mapOutputFiles = Collections.synchronizedMap(new HashMap<TaskAttemptID, MapOutputFile>()); List<MapTaskRunnable> taskRunnables = getMapTaskRunnables(taskSplitMetaInfos, jobId, mapOutputFiles); ExecutorService mapService = createMapExecutor(taskRunnables.size()); // Start populating the executor with work units. // They may begin running immediately (in other threads). for (Runnable r : taskRunnables) { mapService.submit(r); } try { mapService.shutdown(); // Instructs queue to drain. // Wait for tasks to finish; do not use a time-based timeout. // (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024) LOG.info("Waiting for map tasks"); mapService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); } catch (InterruptedException ie) { // Cancel all threads. mapService.shutdownNow(); throw ie; } LOG.info("Map task executor complete."); // After waiting for the map tasks to complete, if any of these // have thrown an exception, rethrow it now in the main thread context. for (MapTaskRunnable r : taskRunnables) { if (r.storedException != null) { throw new Exception(r.storedException); } } TaskAttemptID reduceId = new TaskAttemptID(new TaskID(jobId, false, 0), 0); try { if (numReduceTasks > 0) { ReduceTask reduce = new ReduceTask(systemJobFile.toString(), reduceId, 0, mapIds.size(), 1); reduce.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); JobConf localConf = new JobConf(job); localConf.set("mapreduce.jobtracker.address", "local"); TaskRunner.setupChildMapredLocalDirs(reduce, localConf); // move map output to reduce input for (int i = 0; i < mapIds.size(); i++) { if (!this.isInterrupted()) { TaskAttemptID mapId = mapIds.get(i); Path mapOut = mapOutputFiles.get(mapId).getOutputFile(); MapOutputFile localOutputFile = new MapOutputFile(); localOutputFile.setConf(localConf); Path reduceIn = localOutputFile.getInputFileForWrite( mapId.getTaskID(), localFs.getFileStatus(mapOut).getLen()); if (!localFs.mkdirs(reduceIn.getParent())) { throw new IOException( "Mkdirs failed to create " + reduceIn.getParent().toString()); } if (!localFs.rename(mapOut, reduceIn)) throw new IOException("Couldn't rename " + mapOut); } else { throw new InterruptedException(); } } if (!this.isInterrupted()) { reduce.setJobFile(localJobFile.toString()); localConf.setUser(reduce.getUser()); reduce.localizeConfiguration(localConf); reduce.setConf(localConf); reduce_tasks += 1; myMetrics.launchReduce(reduce.getTaskID()); reduce.run(localConf, this); myMetrics.completeReduce(reduce.getTaskID()); reduce_tasks -= 1; } else { throw new InterruptedException(); } } } finally { for (MapOutputFile output : mapOutputFiles.values()) { output.removeAll(); } } // delete the temporary directory in output directory outputCommitter.commitJob(jContext); status.setCleanupProgress(1.0f); if (killed) { this.status.setRunState(JobStatus.KILLED); } else { this.status.setRunState(JobStatus.SUCCEEDED); } JobEndNotifier.localRunnerNotification(job, status); } catch (Throwable t) { try { outputCommitter.abortJob(jContext, org.apache.hadoop.mapreduce.JobStatus.State.FAILED); } catch (IOException ioe) { LOG.info("Error cleaning up job:" + id); } status.setCleanupProgress(1.0f); if (killed) { this.status.setRunState(JobStatus.KILLED); } else { this.status.setRunState(JobStatus.FAILED); } LOG.warn(id, t); JobEndNotifier.localRunnerNotification(job, status); } finally { try { fs.delete(systemJobFile.getParent(), true); // delete submit dir localFs.delete(localJobFile, true); // delete local copy // Cleanup distributed cache taskDistributedCacheManager.release(); trackerDistributedCacheManager.purgeCache(); } catch (IOException e) { LOG.warn("Error cleaning up " + id + ": " + e); } } }