/** Process incoming heartbeat messages from the task trackers. */ public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) { String trackerName = trackerStatus.getTrackerName(); trackerStatus.setLastSeen(System.currentTimeMillis()); synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus); if (initialContact) { // If it's first contact, then clear out any state hanging around if (seenBefore) { lostTaskTracker(trackerName); } } else { // If not first contact, there should be some record of the tracker if (!seenBefore) { return InterTrackerProtocol.UNKNOWN_TASKTRACKER; } } if (initialContact) { trackerExpiryQueue.add(trackerStatus); } } } updateTaskStatuses(trackerStatus); // LOG.info("Got heartbeat from "+trackerName); return InterTrackerProtocol.TRACKERS_OK; }
public int getTotalSlots(TaskType type) { int slots = 0; for (TaskTrackerStatus tt : taskTrackerManager.taskTrackers()) { slots += (type == TaskType.MAP ? tt.getMaxMapTasks() : tt.getMaxReduceTasks()); } return slots; }
/** * Update the last recorded status for the given task tracker. It assumes that the taskTrackers * are locked on entry. * * @author Owen O'Malley * @param trackerName The name of the tracker * @param status The new status for the task tracker * @return Was an old status found? */ private boolean updateTaskTrackerStatus(String trackerName, TaskTrackerStatus status) { TaskTrackerStatus oldStatus = (TaskTrackerStatus) taskTrackers.get(trackerName); if (oldStatus != null) { totalMaps -= oldStatus.countMapTasks(); totalReduces -= oldStatus.countReduceTasks(); if (status == null) { taskTrackers.remove(trackerName); } } if (status != null) { totalMaps += status.countMapTasks(); totalReduces += status.countReduceTasks(); taskTrackers.put(trackerName, status); } return oldStatus != null; }
public int compare(Object o1, Object o2) { TaskTrackerStatus p1 = (TaskTrackerStatus) o1; TaskTrackerStatus p2 = (TaskTrackerStatus) o2; if (p1.getLastSeen() < p2.getLastSeen()) { return -1; } else if (p1.getLastSeen() > p2.getLastSeen()) { return 1; } else { return (p1.getTrackerName().compareTo(p2.getTrackerName())); } }
/** * Accept and process a new TaskTracker profile. We might have known about the TaskTracker * previously, or it might be brand-new. All task-tracker structures have already been updated. * Just process the contained tasks and any jobs that might be affected. */ void updateTaskStatuses(TaskTrackerStatus status) { for (Iterator it = status.taskReports(); it.hasNext(); ) { TaskStatus report = (TaskStatus) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId()); if (tip == null) { LOG.info( "Serious problem. While updating status, cannot find taskid " + report.getTaskId()); } else { JobInProgress job = tip.getJob(); job.updateTaskStatus(tip, report); if (report.getRunState() == TaskStatus.SUCCEEDED) { job.completedTask(tip, report.getTaskId()); } else if (report.getRunState() == TaskStatus.FAILED) { // Tell the job to fail the relevant task job.failedTask(tip, report.getTaskId(), status.getTrackerName()); } } } }
/** * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks. * This exists so the reduce task thread can locate map task outputs. */ public synchronized MapOutputLocation[] locateMapOutputs( String taskId, String[][] mapTasksNeeded) { ArrayList v = new ArrayList(); for (int i = 0; i < mapTasksNeeded.length; i++) { for (int j = 0; j < mapTasksNeeded[i].length; j++) { TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]); if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) { String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]); TaskTrackerStatus tracker; synchronized (taskTrackers) { tracker = (TaskTrackerStatus) taskTrackers.get(trackerId); } v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort())); break; } } } // randomly shuffle results to load-balance map output requests Collections.shuffle(v); return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]); }
/** * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not * checked in for some time. */ public void run() { while (shouldRun) { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // try { Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); } catch (InterruptedException ie) { } // // Loop through all expired items in the queue // synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = System.currentTimeMillis(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName()); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) { // Remove completely updateTaskTrackerStatus(trackerName, null); lostTaskTracker(leastRecent.getTrackerName()); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } }
@Override public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException { TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus(); ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus(); final int numTaskTrackers = clusterStatus.getTaskTrackers(); final int clusterMapCapacity = clusterStatus.getMaxMapTasks(); final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); // // Get map + reduce counts for the current tracker. // final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots(); final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots(); final int trackerRunningMaps = taskTrackerStatus.countMapTasks(); final int trackerRunningReduces = taskTrackerStatus.countReduceTasks(); // Assigned tasks List<Task> assignedTasks = new ArrayList<Task>(); // // Compute (running + pending) map and reduce task numbers across pool // int remainingReduceLoad = 0; int remainingMapLoad = 0; synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() == JobStatus.RUNNING) { remainingMapLoad += (job.desiredMaps() - job.finishedMaps()); if (job.scheduleReduces()) { remainingReduceLoad += (job.desiredReduces() - job.finishedReduces()); } } } } // Compute the 'load factor' for maps and reduces double mapLoadFactor = 0.0; if (clusterMapCapacity > 0) { mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity; } double reduceLoadFactor = 0.0; if (clusterReduceCapacity > 0) { reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity; } // // In the below steps, we allocate first map tasks (if appropriate), // and then reduce tasks if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We assign tasks to the current taskTracker if the given machine // has a workload that's less than the maximum load of that kind of // task. // However, if the cluster is close to getting loaded i.e. we don't // have enough _padding_ for speculative executions etc., we only // schedule the "highest priority" task i.e. the task from the job // with the highest priority. // final int trackerCurrentMapCapacity = Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity); int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps; boolean exceededMapPadding = false; if (availableMapSlots > 0) { exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity); } int numLocalMaps = 0; int numNonLocalMaps = 0; boolean newIterationJob = false; scheduleMaps: for (int i = 0; i < availableMapSlots; ++i) { synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } if (job.getJobConf().isIterative()) { String iterativeAppID = job.getJobConf().getIterativeAlgorithmID(); if (iterativeAppID.equals("none")) { throw new IOException("please specify the iteration ID!"); } String jointype = job.getJobConf().get("mapred.iterative.jointype"); // prepare the iterationid map and jobtask map if (!this.tracker_mtask_map.containsKey(iterativeAppID)) { // a new iterative algorithm Map<String, LinkedList<Integer>> new_tracker_task_map = new HashMap<String, LinkedList<Integer>>(); this.tracker_mtask_map.put(iterativeAppID, new_tracker_task_map); Map<String, LinkedList<Integer>> new_tracker_rtask_map = new HashMap<String, LinkedList<Integer>>(); this.tracker_rtask_map.put(iterativeAppID, new_tracker_rtask_map); // record the first job of the series of jobs in the iterations this.first_job_map.put(iterativeAppID, job.getJobID()); // record the list of jobs for a iteration HashSet<JobID> jobs = new HashSet<JobID>(); jobs.add(job.getJobID()); this.iteration_jobs_map.put(iterativeAppID, jobs); } // this is the first job of the series of jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID()) && job.getJobConf().isIterative()) { LOG.info(job.getJobID() + " is the first iteration job"); newIterationJob = true; } // this is one of the following jobs, and prepare a assignment list for the assignment if (!newIterationJob) { LOG.info(job.getJobID() + " is not the first iteration job"); this.iteration_jobs_map.get(iterativeAppID).add(job.getJobID()); if (this.mtask_assign_map.get(job.getJobID()) == null) { // prepare the map task assignment list LOG.info("for job " + job.getJobID() + "'s assignment:"); Map<String, LinkedList<Integer>> map_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_mtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); LOG.info("assign on tracker " + tracker); for (int taskid : taskids) { copytaskids.add(taskid); LOG.info("task id " + taskid); } map_task_assign.put(tracker, copytaskids); } this.mtask_assign_map.put(job.getJobID(), map_task_assign); // if one2one copy the map assign to reduce assign, the are with the same mapping if (jointype.equals("one2one")) { // prepare the reduce task assignment list Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_mtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for (int taskid : taskids) { copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.tracker_rtask_map.put(iterativeAppID, reduce_task_assign); } // prepare the reduce task assignment list for all cases Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for (int taskid : taskids) { copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } } Task t = null; // the first iteration or following iteration // if the first iteration: assign taskid by default (exception for the one2mul case, // where we assign staring from 0,...,n) // else if the following iterations: assign taskid based on the first iteration // assignment if (newIterationJob) { /** * the one2mul case should be carefully taken care, we want to assgin map0,map1,map2 * and reduce0 to a tracker, and assign map3,map4,map5 and reduce1 to another tracker */ if (jointype.equals("one2mul") && !tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { // if contain the tracker, that means we have assigned tasks for this tracker int scala = job.getJobConf().getInt("mapred.iterative.data.scala", 1); // int mapsEachTracker = job.getJobConf().getNumMapTasks() / numTaskTrackers; int reducersEachTracker = job.getJobConf().getNumReduceTasks() / numTaskTrackers; if (job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0) throw new IOException( "job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0"); if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } if (!this.tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_rtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } // for debugging String debugout1 = "maps: "; String debugout2 = "reduces: "; int reduceOffsetId = (tracker_rtask_map.get(iterativeAppID).size() - 1) * reducersEachTracker; // the start reduce id for (int count = 0; count < reducersEachTracker; count++) { int reducepartitionid = reduceOffsetId + count; debugout2 += reducepartitionid + " "; tracker_rtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(reducepartitionid); for (int count2 = 0; count2 < scala; count2++) { int mappartitionid = reducepartitionid * scala + count2; // int mapid = job.splitTaskMap.get(mappartitionid); debugout1 += mappartitionid + " "; this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(mappartitionid); } } // print out for debug LOG.info( "tracker " + taskTracker.getTrackerName() + " assigned tasks " + debugout1 + " and " + debugout2); // make the assignment list String tracker = taskTracker.getTrackerName(); LinkedList<Integer> mtaskids = this.tracker_mtask_map.get(iterativeAppID).get(taskTracker.getTrackerName()); LinkedList<Integer> mcopytaskids = new LinkedList<Integer>(); for (int taskid : mtaskids) { mcopytaskids.add(taskid); } if (!mtask_assign_map.containsKey(job.getJobID())) { Map<String, LinkedList<Integer>> map_task_assign = new HashMap<String, LinkedList<Integer>>(); this.mtask_assign_map.put(job.getJobID(), map_task_assign); } this.mtask_assign_map.get(job.getJobID()).put(tracker, mcopytaskids); // prepare the reduce task assignment list LinkedList<Integer> rtaskids = this.tracker_rtask_map.get(iterativeAppID).get(taskTracker.getTrackerName()); LinkedList<Integer> rcopytaskids = new LinkedList<Integer>(); for (int taskid : rtaskids) { rcopytaskids.add(taskid); } if (!rtask_assign_map.containsKey(job.getJobID())) { Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } this.rtask_assign_map.get(job.getJobID()).put(tracker, rcopytaskids); // assign a map task for this tracker Integer target = null; try { target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); } catch (Exception e) { e.printStackTrace(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } } else { Integer target = null; try { target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); } catch (Exception e) { e.printStackTrace(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } if (t != null) { assignedTasks.add(t); ++numLocalMaps; // new iteration job and the first task for a tasktracker // for one2mul case, we don't need to record the assignment, since we already made the // assignment list beforehand if (!newIterationJob || jointype.equals("one2mul")) { // poll, remove this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } else { // record the assignment list for map tasks if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); // prepare the reduce assignment, for mapping with reduce if (jointype.equals("one2one")) { // prepare the reduce assignment, for mapping with reduce if (!first_job_reduces_map.containsKey(iterativeAppID)) { Map<String, LinkedList<Integer>> tracker_reduce_map = new HashMap<String, LinkedList<Integer>>(); first_job_reduces_map.put(iterativeAppID, tracker_reduce_map); } if (!first_job_reduces_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> reduces = new LinkedList<Integer>(); first_job_reduces_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), reduces); } first_job_reduces_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); } LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task break; } LOG.error("New Node Or Rack Local Map Task failed!"); if (newIterationJob) { // Try to schedule a node-local or rack-local Map task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } else { Integer target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // new iteration job and the first task for a tasktracker if (newIterationJob) { if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); } else { // poll, remove this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); } // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } else { // not an iterative algorithm, normal schedule Task t = null; // Try to schedule a node-local or rack-local Map task t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numLocalMaps; // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task break; } // Try to schedule a node-local or rack-local Map task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } } } } int assignedMaps = assignedTasks.size(); // // Same thing, but for reduce tasks // However we _never_ assign more than 1 reduce task per heartbeat // /** should maintain the reduce task location for the termination check */ final int trackerCurrentReduceCapacity = Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity); final int availableReduceSlots = Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1); boolean exceededReducePadding = false; // LOG.info("availableReduceSlots " + availableReduceSlots); if (availableReduceSlots > 0) { exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity); synchronized (jobQueue) { for (JobInProgress job : jobQueue) { LOG.info("job " + job.getJobID()); if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) { LOG.info("have to continue " + job.getStatus().getRunState()); continue; } Task t = null; if (job.getJobConf().isIterative()) { String iterativeAppID = job.getJobConf().getIterativeAlgorithmID(); if (iterativeAppID.equals("none")) { throw new IOException("please specify the iteration ID!"); } String jointype = job.getJobConf().get("mapred.iterative.jointype"); if (jointype.equals("one2one")) { // one-to-one or one-to-mul jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID()) && job.getJobConf().isIterative()) { LOG.info(job.getJobID() + " is the first iteration job for reduce"); newIterationJob = true; } Integer target = null; if (newIterationJob) { if (first_job_reduces_map.get(iterativeAppID) == null) { throw new IOException( "I think something is wrong since the tasktracker never receive " + "a map task with iterativeapp id " + iterativeAppID); } if (first_job_reduces_map.get(iterativeAppID).get(taskTracker.getTrackerName()) == null) { throw new IOException( "I think something is wrong since the tasktracker never receive " + "a map task with iterativeapp id " + iterativeAppID + " from " + taskTracker.getTrackerName()); } target = this.first_job_reduces_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .pollFirst(); } else { // the task assignment has already been processed during the map task assignment, so // never use tracker_rtask_map target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all reduce tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else if (jointype.equals("one2mul")) { Integer target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all reduce tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else { // one-to-all case, assign tasks in the first iteration job, and remember this mapping // this is the first job of the series of jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())) { LOG.info(job.getJobID() + " is the first iteration job for reduce"); newIterationJob = true; } /* //this is one of the following jobs, and prepare a assignment list for the assignment else{ LOG.info(job.getJobID() + " is not the first iteration job for reduce"); if(this.rtask_assign_map.get(job.getJobID()) == null){ //prepare the map task assignment list Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for(Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()){ String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for(int taskid : taskids){ copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } } */ // the first iteration or following iteration // if the first iteration: assign taskid by default // else if the following iterations: assign taskid based on the first iteration // assignment if (newIterationJob) { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { if (!this.tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_rtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_rtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); LOG.info( "assigning reduce task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } } else { Integer target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } if (t != null) { // poll, remove this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); LOG.info( "assigning reduce task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } } } } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } LOG.info("try to assign new task " + t); if (t != null) { assignedTasks.add(t); break; } // Don't assign reduce tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededReducePadding) { break; } } } } if (LOG.isDebugEnabled()) { LOG.debug( "Task assignments for " + taskTrackerStatus.getTrackerName() + " --> " + "[" + mapLoadFactor + ", " + trackerMapCapacity + ", " + trackerCurrentMapCapacity + ", " + trackerRunningMaps + "] -> [" + (trackerCurrentMapCapacity - trackerRunningMaps) + ", " + assignedMaps + " (" + numLocalMaps + ", " + numNonLocalMaps + ")] [" + reduceLoadFactor + ", " + trackerReduceCapacity + ", " + trackerCurrentReduceCapacity + "," + trackerRunningReduces + "] -> [" + (trackerCurrentReduceCapacity - trackerRunningReduces) + ", " + (assignedTasks.size() - assignedMaps) + "]"); } return assignedTasks; }
/* * TODO: * For Elf: need to change the major schedule logic, scheduling need * to be *datacenter-aware* * */ @Override public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException { TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus(); ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus(); final int numTaskTrackers = clusterStatus.getTaskTrackers(); final int clusterMapCapacity = clusterStatus.getMaxMapTasks(); final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); // // Get map + reduce counts for the current tracker. // final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots(); final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots(); final int trackerRunningMaps = taskTrackerStatus.countMapTasks(); final int trackerRunningReduces = taskTrackerStatus.countReduceTasks(); // Assigned tasks List<Task> assignedTasks = new ArrayList<Task>(); // // Compute (running + pending) map and reduce task numbers across pool // int remainingReduceLoad = 0; int remainingMapLoad = 0; synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() == JobStatus.RUNNING) { remainingMapLoad += (job.desiredMaps() - job.finishedMaps()); if (job.scheduleReduces()) { remainingReduceLoad += (job.desiredReduces() - job.finishedReduces()); } } } } // Compute the 'load factor' for maps and reduces double mapLoadFactor = 0.0; if (clusterMapCapacity > 0) { mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity; } double reduceLoadFactor = 0.0; if (clusterReduceCapacity > 0) { reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity; } // // In the below steps, we allocate first map tasks (if appropriate), // and then reduce tasks if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We assign tasks to the current taskTracker if the given machine // has a workload that's less than the maximum load of that kind of // task. // However, if the cluster is close to getting loaded i.e. we don't // have enough _padding_ for speculative executions etc., we only // schedule the "highest priority" task i.e. the task from the job // with the highest priority. // final int trackerCurrentMapCapacity = Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity); int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps; boolean exceededMapPadding = false; if (availableMapSlots > 0) { exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity); } int numLocalMaps = 0; int numNonLocalMaps = 0; scheduleMaps: // TODO: for Elf // The main schedule logic here, outer for loop is for every slot, inner loop is for each job for (int i = 0; i < availableMapSlots; ++i) { synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = null; // Try to schedule a node-local or rack-local Map task t = job.obtainNewLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numLocalMaps; // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task // Note: it's FIFO here: next time in the inner for loop the head-of-queue // will still be chosen break; } // If no locality for this job, try launching non-local // Try to schedule a node-local or rack-local Map task --> original comments // FIXME: is the above comment correct? seems should be non-local task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } } } int assignedMaps = assignedTasks.size(); // // Same thing, but for reduce tasks // However we _never_ assign more than 1 reduce task per heartbeat // final int trackerCurrentReduceCapacity = Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity); final int availableReduceSlots = Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1); boolean exceededReducePadding = false; if (availableReduceSlots > 0) { exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity); synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) { continue; } Task t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); break; } // Don't assign reduce tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededReducePadding) { break; } } } } if (LOG.isDebugEnabled()) { LOG.debug( "Task assignments for " + taskTrackerStatus.getTrackerName() + " --> " + "[" + mapLoadFactor + ", " + trackerMapCapacity + ", " + trackerCurrentMapCapacity + ", " + trackerRunningMaps + "] -> [" + (trackerCurrentMapCapacity - trackerRunningMaps) + ", " + assignedMaps + " (" + numLocalMaps + ", " + numNonLocalMaps + ")] [" + reduceLoadFactor + ", " + trackerReduceCapacity + ", " + trackerCurrentReduceCapacity + "," + trackerRunningReduces + "] -> [" + (trackerCurrentReduceCapacity - trackerRunningReduces) + ", " + (assignedTasks.size() - assignedMaps) + "]"); } return assignedTasks; }
/** Check refreshNodes for decommissioning blacklisted nodes. */ public void testBlacklistedNodeDecommissioning() throws Exception { LOG.info("Testing blacklisted node decommissioning"); MiniMRCluster mr = null; JobTracker jt = null; try { // start mini mr JobConf jtConf = new JobConf(); jtConf.set("mapred.max.tracker.blacklists", "1"); mr = new MiniMRCluster(0, 0, 2, "file:///", 1, null, null, null, jtConf); jt = mr.getJobTrackerRunner().getJobTracker(); assertEquals("Trackers not up", 2, jt.taskTrackers().size()); // validate the total tracker count assertEquals( "Active tracker count mismatch", 2, jt.getClusterStatus(false).getTaskTrackers()); // validate blacklisted count assertEquals( "Blacklisted tracker count mismatch", 0, jt.getClusterStatus(false).getBlacklistedTrackers()); // run a failing job to blacklist the tracker JobConf jConf = mr.createJobConf(); jConf.set("mapred.max.tracker.failures", "1"); jConf.setJobName("test-job-fail-once"); jConf.setMapperClass(FailOnceMapper.class); jConf.setReducerClass(IdentityReducer.class); jConf.setNumMapTasks(1); jConf.setNumReduceTasks(0); RunningJob job = UtilsForTests.runJob(jConf, new Path(TEST_DIR, "in"), new Path(TEST_DIR, "out")); job.waitForCompletion(); // validate the total tracker count assertEquals( "Active tracker count mismatch", 1, jt.getClusterStatus(false).getTaskTrackers()); // validate blacklisted count assertEquals( "Blacklisted tracker count mismatch", 1, jt.getClusterStatus(false).getBlacklistedTrackers()); // find the blacklisted tracker String trackerName = null; for (TaskTrackerStatus status : jt.taskTrackers()) { if (jt.isBlacklisted(status.getTrackerName())) { trackerName = status.getTrackerName(); break; } } // get the hostname String hostToDecommission = JobInProgress.convertTrackerNameToHostName(trackerName); LOG.info("Decommissioning tracker " + hostToDecommission); // decommission the node HashSet<String> decom = new HashSet<String>(1); decom.add(hostToDecommission); jt.decommissionNodes(decom); // validate // check the cluster status and tracker size assertEquals( "Tracker is not lost upon host decommissioning", 1, jt.getClusterStatus(false).getTaskTrackers()); assertEquals( "Blacklisted tracker count incorrect in cluster status " + "after decommissioning", 0, jt.getClusterStatus(false).getBlacklistedTrackers()); assertEquals("Tracker is not lost upon host decommissioning", 1, jt.taskTrackers().size()); } finally { if (mr != null) { mr.shutdown(); mr = null; jt = null; FileUtil.fullyDelete(new File(TEST_DIR.toString())); } } }
private void printFailedAttempts( JspWriter out, JobTracker tracker, JobID jobId, TaskInProgress tip, TaskStatus.State failState) throws IOException { TaskStatus[] statuses = tip.getTaskStatuses(); TaskID tipId = tip.getTIPId(); for (int i = 0; i < statuses.length; ++i) { TaskStatus.State taskState = statuses[i].getRunState(); if ((failState == null && (taskState == TaskStatus.State.FAILED || taskState == TaskStatus.State.KILLED)) || taskState == failState) { String taskTrackerName = statuses[i].getTaskTracker(); TaskTrackerStatus taskTracker = tracker.getTaskTrackerStatus(taskTrackerName); out.print( "<tr><td>" + statuses[i].getTaskID() + "</td><td><a href=\"taskdetails.jsp?jobid=" + jobId + "&tipid=" + tipId + "\">" + tipId + "</a></td>"); if (taskTracker == null) { out.print("<td>" + taskTrackerName + "</td>"); } else { out.print( "<td><a href=\"http://" + taskTracker.getHost() + ":" + taskTracker.getHttpPort() + "\">" + taskTracker.getHost() + "</a></td>"); } out.print("<td>" + taskState + "</td>"); out.print("<td><pre>"); String[] failures = tracker.getTaskDiagnostics(statuses[i].getTaskID()); if (failures == null) { out.print(" "); } else { for (int j = 0; j < failures.length; j++) { out.print(failures[j]); if (j < (failures.length - 1)) { out.print("\n-------\n"); } } } out.print("</pre></td>"); out.print("<td>"); String taskLogUrl = null; if (taskTracker != null) { taskLogUrl = TaskLogServlet.getTaskLogUrl( taskTracker.getHost(), String.valueOf(taskTracker.getHttpPort()), statuses[i].getTaskID().toString()); } if (taskLogUrl != null) { String tailFourKBUrl = taskLogUrl + "&start=-4097"; String tailEightKBUrl = taskLogUrl + "&start=-8193"; String entireLogUrl = taskLogUrl; out.print("<a href=\"" + tailFourKBUrl + "\">Last 4KB</a><br/>"); out.print("<a href=\"" + tailEightKBUrl + "\">Last 8KB</a><br/>"); out.print("<a href=\"" + entireLogUrl + "\">All</a><br/>"); } else { out.print("n/a"); // task tracker was lost } out.print("</td>"); out.print("</tr>\n"); } } }
/** * A tracker wants to know if there's a Task to run. Returns a task we'd like the TaskTracker to * execute right now. * * <p>Eventually this function should compute load on the various TaskTrackers, and incorporate * knowledge of DFS file placement. But for right now, it just grabs a single item out of the * pending task list and hands it back. */ public synchronized Task pollForNewTask(String taskTracker) { // // Compute average map and reduce task numbers across pool // int avgMaps = 0; int avgReduces = 0; int numTaskTrackers; TaskTrackerStatus tts; synchronized (taskTrackers) { numTaskTrackers = taskTrackers.size(); tts = (TaskTrackerStatus) taskTrackers.get(taskTracker); } if (numTaskTrackers > 0) { avgMaps = totalMaps / numTaskTrackers; avgReduces = totalReduces / numTaskTrackers; } int totalCapacity = numTaskTrackers * maxCurrentTasks; // // Get map + reduce counts for the current tracker. // if (tts == null) { LOG.warning("Unknown task tracker polling; ignoring: " + taskTracker); return null; } int numMaps = tts.countMapTasks(); int numReduces = tts.countReduceTasks(); // // In the below steps, we allocate first a map task (if appropriate), // and then a reduce task if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We hand a task to the current taskTracker if the given machine // has a workload that's equal to or less than the averageMaps // +/- TASK_ALLOC_EPSILON. (That epsilon is in place in case // there is an odd machine that is failing for some reason but // has not yet been removed from the pool, making capacity seem // larger than it really is.) // synchronized (jobsByArrival) { if ((numMaps < maxCurrentTasks) && (numMaps <= (avgMaps + TASK_ALLOC_EPSILON))) { int totalNeededMaps = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewMapTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededMaps += job.desiredMaps(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededMaps * PAD_FRACTION); } if (totalNeededMaps + padding >= totalCapacity) { break; } } } // // Same thing, but for reduce tasks // if ((numReduces < maxCurrentTasks) && (numReduces <= (avgReduces + TASK_ALLOC_EPSILON))) { int totalNeededReduces = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewReduceTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededReduces += job.desiredReduces(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededReduces * PAD_FRACTION); } if (totalNeededReduces + padding >= totalCapacity) { break; } } } } return null; }