void removeTaskEntry(String taskid) { // taskid --> tracker String tracker = (String) taskidToTrackerMap.remove(taskid); // tracker --> taskid TreeSet trackerSet = (TreeSet) trackerToTaskMap.get(tracker); if (trackerSet != null) { trackerSet.remove(taskid); } // taskid --> TIP taskidToTIPMap.remove(taskid); }
/** * We lost the task tracker! All task-tracker structures have already been updated. Just process * the contained tasks and any jobs that might be affected. */ void lostTaskTracker(String trackerName) { LOG.info("Lost tracker '" + trackerName + "'"); TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName); trackerToTaskMap.remove(trackerName); if (lostTasks != null) { for (Iterator it = lostTasks.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); // Tell the job to fail the relevant task JobInProgress job = tip.getJob(); job.failedTask(tip, taskId, trackerName); } } }
/** * JobTracker.submitJob() kicks off a new job. * * <p>Create a 'JobInProgress' object, which contains both JobProfile and JobStatus. Those two * sub-objects are sometimes shipped outside of the JobTracker. But JobInProgress adds info that's * useful for the JobTracker alone. * * <p>We add the JIP to the jobInitQueue, which is processed asynchronously to handle * split-computation and build up the right TaskTracker/Block mapping. */ public synchronized JobStatus submitJob(String jobFile) throws IOException { totalSubmissions++; JobInProgress job = new JobInProgress(jobFile, this, this.conf); synchronized (jobs) { synchronized (jobsByArrival) { synchronized (jobInitQueue) { jobs.put(job.getProfile().getJobId(), job); jobsByArrival.add(job); jobInitQueue.add(job); jobInitQueue.notifyAll(); } } } return job.getStatus(); }
/** * Clear the entire contents of the cache and delete the backing files. This should only be used * when the server is reinitializing, because the users are going to lose their files. */ public static void purgeCache(Configuration conf, MRAsyncDiskService service) throws IOException { synchronized (cachedArchives) { LocalFileSystem localFs = FileSystem.getLocal(conf); for (Map.Entry<String, CacheStatus> f : cachedArchives.entrySet()) { try { deleteLocalPath(service, localFs, f.getValue().localizedLoadPath); } catch (IOException ie) { LOG.debug("Error cleaning up cache", ie); } } cachedArchives.clear(); } }
/** * Update the last recorded status for the given task tracker. It assumes that the taskTrackers * are locked on entry. * * @author Owen O'Malley * @param trackerName The name of the tracker * @param status The new status for the task tracker * @return Was an old status found? */ private boolean updateTaskTrackerStatus(String trackerName, TaskTrackerStatus status) { TaskTrackerStatus oldStatus = (TaskTrackerStatus) taskTrackers.get(trackerName); if (oldStatus != null) { totalMaps -= oldStatus.countMapTasks(); totalReduces -= oldStatus.countReduceTasks(); if (status == null) { taskTrackers.remove(trackerName); } } if (status != null) { totalMaps += status.countMapTasks(); totalReduces += status.countReduceTasks(); taskTrackers.put(trackerName, status); } return oldStatus != null; }
public synchronized JobStatus getJobStatus(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job != null) { return job.getStatus(); } else { return null; } }
public synchronized JobProfile getJobProfile(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job != null) { return job.getProfile(); } else { return null; } }
/** * A tracker wants to know if any of its Tasks have been closed (because the job completed, * whether successfully or not) */ public synchronized String pollForTaskWithClosedJob(String taskTracker) { TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskIds != null) { for (Iterator it = taskIds.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); if (tip.shouldCloseForClosedJob(taskId)) { // // This is how the JobTracker ends a task at the TaskTracker. // It may be successfully completed, or may be killed in // mid-execution. // return taskId; } } } return null; }
/** * Update the maps baseDirSize and baseDirNumberSubDir when deleting cache. * * @param cacheStatus cache status of the cache is deleted */ private static void deleteCacheInfoUpdate(CacheStatus cacheStatus) { if (!cacheStatus.isInited()) { // if it is not created yet, do nothing. return; } synchronized (baseDirSize) { Long dirSize = baseDirSize.get(cacheStatus.getBaseDir()); if (dirSize != null) { dirSize -= cacheStatus.size; baseDirSize.put(cacheStatus.getBaseDir(), dirSize); } } synchronized (baseDirNumberSubDir) { Integer dirSubDir = baseDirNumberSubDir.get(cacheStatus.getBaseDir()); if (dirSubDir != null) { dirSubDir--; baseDirNumberSubDir.put(cacheStatus.getBaseDir(), dirSubDir); } } }
public Vector completedJobs() { Vector v = new Vector(); for (Iterator it = jobs.values().iterator(); it.hasNext(); ) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.SUCCEEDED) { v.add(jip); } } return v; }
public Vector runningJobs() { Vector v = new Vector(); for (Iterator it = jobs.values().iterator(); it.hasNext(); ) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.RUNNING) { v.add(jip); } } return v; }
/** * Update the maps baseDirSize and baseDirNumberSubDir when adding cache. * * @param cacheStatus cache status of the cache is added */ private static void addCacheInfoUpdate(CacheStatus cacheStatus) { long cacheSize = cacheStatus.size; synchronized (baseDirSize) { Long dirSize = baseDirSize.get(cacheStatus.getBaseDir()); if (dirSize == null) { dirSize = Long.valueOf(cacheSize); } else { dirSize += cacheSize; } baseDirSize.put(cacheStatus.getBaseDir(), dirSize); } synchronized (baseDirNumberSubDir) { Integer dirSubDir = baseDirNumberSubDir.get(cacheStatus.getBaseDir()); if (dirSubDir == null) { dirSubDir = 1; } else { dirSubDir += 1; } baseDirNumberSubDir.put(cacheStatus.getBaseDir(), dirSubDir); } }
private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService) throws IOException { List<CacheStatus> deleteSet = new LinkedList<CacheStatus>(); // try deleting cache Status with refcount of zero synchronized (cachedArchives) { for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) { String cacheId = (String) it.next(); CacheStatus lcacheStatus = cachedArchives.get(cacheId); if (lcacheStatus.refcount == 0) { // delete this cache entry from the global list // and mark the localized file for deletion deleteSet.add(lcacheStatus); it.remove(); } } } // do the deletion asynchronously, after releasing the global lock Thread cacheFileCleaner = new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet)); cacheFileCleaner.start(); }
/** * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks. * This exists so the reduce task thread can locate map task outputs. */ public synchronized MapOutputLocation[] locateMapOutputs( String taskId, String[][] mapTasksNeeded) { ArrayList v = new ArrayList(); for (int i = 0; i < mapTasksNeeded.length; i++) { for (int j = 0; j < mapTasksNeeded[i].length; j++) { TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]); if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) { String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]); TaskTrackerStatus tracker; synchronized (taskTrackers) { tracker = (TaskTrackerStatus) taskTrackers.get(trackerId); } v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort())); break; } } } // randomly shuffle results to load-balance map output requests Collections.shuffle(v); return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]); }
/** * This is the opposite of getlocalcache. When you are done with using the cache, you need to * release the cache * * @param cache The cache URI to be released * @param conf configuration which contains the filesystem the cache is contained in. * @throws IOException */ public static void releaseCache(URI cache, Configuration conf, long timeStamp) throws IOException { String cacheId = getKey(cache, conf, timeStamp); synchronized (cachedArchives) { CacheStatus lcacheStatus = cachedArchives.get(cacheId); if (lcacheStatus == null) { LOG.warn( "Cannot find localized cache: " + cache + " (key: " + cacheId + ") in releaseCache!"); return; } lcacheStatus.refcount--; } }
/////////////////////////////////////////////////////// // Maintain lookup tables; called by JobInProgress // and TaskInProgress /////////////////////////////////////////////////////// void createTaskEntry(String taskid, String taskTracker, TaskInProgress tip) { LOG.info( "Adding task '" + taskid + "' to tip " + tip.getTIPId() + ", for tracker '" + taskTracker + "'"); // taskid --> tracker taskidToTrackerMap.put(taskid, taskTracker); // tracker --> taskid TreeSet taskset = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskset == null) { taskset = new TreeSet(); trackerToTaskMap.put(taskTracker, taskset); } taskset.add(taskid); // taskid --> TIP taskidToTIPMap.put(taskid, tip); }
public synchronized TaskReport[] getReduceTaskReports(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job == null) { return new TaskReport[0]; } else { Vector reports = new Vector(); Vector completeReduceTasks = job.reportTasksInProgress(false, true); for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } Vector incompleteReduceTasks = job.reportTasksInProgress(false, false); for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]); } }
/** * Accept and process a new TaskTracker profile. We might have known about the TaskTracker * previously, or it might be brand-new. All task-tracker structures have already been updated. * Just process the contained tasks and any jobs that might be affected. */ void updateTaskStatuses(TaskTrackerStatus status) { for (Iterator it = status.taskReports(); it.hasNext(); ) { TaskStatus report = (TaskStatus) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId()); if (tip == null) { LOG.info( "Serious problem. While updating status, cannot find taskid " + report.getTaskId()); } else { JobInProgress job = tip.getJob(); job.updateTaskStatus(tip, report); if (report.getRunState() == TaskStatus.SUCCEEDED) { job.completedTask(tip, report.getTaskId()); } else if (report.getRunState() == TaskStatus.FAILED) { // Tell the job to fail the relevant task job.failedTask(tip, report.getTaskId(), status.getTrackerName()); } } } }
public synchronized void killJob(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); job.kill(); }
public Collection taskTrackers() { synchronized (taskTrackers) { return taskTrackers.values(); } }
/** * Get the locally cached file or archive; it could either be previously cached (and valid) or * copy it from the {@link FileSystem} now. * * @param cache the cache to be localized, this should be specified as new * URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is * provided the file is assumed to be in the filesystem being used in the Configuration * @param conf The Confguration file which contains the filesystem * @param subDir The sub cache Dir where you want to localize the files/archives * @param fileStatus The file status on the dfs. * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or * .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred * automatically and the directory where the archive is unzipped/unjarred/untarred is returned * as the Path. In case of a file, the path to the file is returned * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be * cached hasn't changed since the job started * @param fileLength this is the length of the cache file * @param currentWorkDir this is the directory where you would want to create symlinks for the * locally cached files/archives * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says * so (this is required for an optimization in task launches * @param lDirAllocator LocalDirAllocator of the tracker * @return the path to directory where the archives are unjarred in case of archives, the path to * the file where the file is copied locally * @throws IOException */ private static Path getLocalCache( URI cache, Configuration conf, Path subDir, FileStatus fileStatus, boolean isArchive, long confFileStamp, long fileLength, Path currentWorkDir, boolean honorSymLinkConf, MRAsyncDiskService asyncDiskService, LocalDirAllocator lDirAllocator) throws IOException { String key = getKey(cache, conf, confFileStamp); CacheStatus lcacheStatus; Path localizedPath; synchronized (cachedArchives) { lcacheStatus = cachedArchives.get(key); if (lcacheStatus == null) { // was never localized Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong())); String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString(); Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf); lcacheStatus = new CacheStatus( new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir); cachedArchives.put(key, lcacheStatus); } lcacheStatus.refcount++; } boolean initSuccessful = false; try { synchronized (lcacheStatus) { if (!lcacheStatus.isInited()) { localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive); lcacheStatus.initComplete(); } else { if (fileStatus != null) { localizedPath = checkCacheStatusValidity( conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive); } else { // if fileStatus is null, then the md5 must be correct // so there is no need to check for cache validity localizedPath = lcacheStatus.localizedLoadPath; } } createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf); } // try deleting stuff if you can long size = 0; int numberSubDir = 0; synchronized (lcacheStatus) { synchronized (baseDirSize) { Long get = baseDirSize.get(lcacheStatus.getBaseDir()); if (get != null) { size = get.longValue(); } else { LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir()); } } synchronized (baseDirNumberSubDir) { Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir()); if (get != null) { numberSubDir = get.intValue(); } else { LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir()); } } } // setting the cache size to a default of 10GB long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); long allowedNumberSubDir = conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT); if (allowedSize < size || allowedNumberSubDir < numberSubDir) { // try some cache deletions LOG.debug( "Start deleting released cache because" + " [size, allowedSize, numberSubDir, allowedNumberSubDir] =" + " [" + size + ", " + allowedSize + ", " + numberSubDir + ", " + allowedNumberSubDir + "]"); deleteCache(conf, asyncDiskService); } initSuccessful = true; return localizedPath; } finally { if (!initSuccessful) { synchronized (cachedArchives) { lcacheStatus.refcount--; } } } }
public TaskTrackerStatus getTaskTracker(String trackerID) { synchronized (taskTrackers) { return (TaskTrackerStatus) taskTrackers.get(trackerID); } }
/** * A tracker wants to know if there's a Task to run. Returns a task we'd like the TaskTracker to * execute right now. * * <p>Eventually this function should compute load on the various TaskTrackers, and incorporate * knowledge of DFS file placement. But for right now, it just grabs a single item out of the * pending task list and hands it back. */ public synchronized Task pollForNewTask(String taskTracker) { // // Compute average map and reduce task numbers across pool // int avgMaps = 0; int avgReduces = 0; int numTaskTrackers; TaskTrackerStatus tts; synchronized (taskTrackers) { numTaskTrackers = taskTrackers.size(); tts = (TaskTrackerStatus) taskTrackers.get(taskTracker); } if (numTaskTrackers > 0) { avgMaps = totalMaps / numTaskTrackers; avgReduces = totalReduces / numTaskTrackers; } int totalCapacity = numTaskTrackers * maxCurrentTasks; // // Get map + reduce counts for the current tracker. // if (tts == null) { LOG.warning("Unknown task tracker polling; ignoring: " + taskTracker); return null; } int numMaps = tts.countMapTasks(); int numReduces = tts.countReduceTasks(); // // In the below steps, we allocate first a map task (if appropriate), // and then a reduce task if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We hand a task to the current taskTracker if the given machine // has a workload that's equal to or less than the averageMaps // +/- TASK_ALLOC_EPSILON. (That epsilon is in place in case // there is an odd machine that is failing for some reason but // has not yet been removed from the pool, making capacity seem // larger than it really is.) // synchronized (jobsByArrival) { if ((numMaps < maxCurrentTasks) && (numMaps <= (avgMaps + TASK_ALLOC_EPSILON))) { int totalNeededMaps = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewMapTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededMaps += job.desiredMaps(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededMaps * PAD_FRACTION); } if (totalNeededMaps + padding >= totalCapacity) { break; } } } // // Same thing, but for reduce tasks // if ((numReduces < maxCurrentTasks) && (numReduces <= (avgReduces + TASK_ALLOC_EPSILON))) { int totalNeededReduces = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewReduceTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededReduces += job.desiredReduces(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededReduces * PAD_FRACTION); } if (totalNeededReduces + padding >= totalCapacity) { break; } } } } return null; }
public synchronized ClusterStatus getClusterStatus() { synchronized (taskTrackers) { return new ClusterStatus(taskTrackers.size(), totalMaps, totalReduces, maxCurrentTasks); } }
/////////////////////////////////////////////////////////////// // JobTracker methods /////////////////////////////////////////////////////////////// public JobInProgress getJob(String jobid) { return (JobInProgress) jobs.get(jobid); }