/** * We lost the task tracker! All task-tracker structures have already been updated. Just process * the contained tasks and any jobs that might be affected. */ void lostTaskTracker(String trackerName) { LOG.info("Lost tracker '" + trackerName + "'"); TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName); trackerToTaskMap.remove(trackerName); if (lostTasks != null) { for (Iterator it = lostTasks.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); // Tell the job to fail the relevant task JobInProgress job = tip.getJob(); job.failedTask(tip, taskId, trackerName); } } }
@Override public void updateDemand() { demand = 0; if (isRunnable()) { // For reduces, make sure enough maps are done that reduces can launch if (taskType == TaskType.REDUCE && !job.scheduleReduces()) return; // Add up demand from each TaskInProgress; each TIP can either // - have no attempts running, in which case it demands 1 slot // - have N attempts running, in which case it demands N slots, and may // potentially demand one more slot if it needs to be speculated TaskInProgress[] tips = (taskType == TaskType.MAP ? job.getTasks(TaskType.MAP) : job.getTasks(TaskType.REDUCE)); boolean speculationEnabled = (taskType == TaskType.MAP ? job.hasSpeculativeMaps() : job.hasSpeculativeReduces()); long time = scheduler.getClock().getTime(); for (TaskInProgress tip : tips) { if (!tip.isComplete()) { if (tip.isRunning()) { // Count active tasks and any speculative task we want to launch demand += tip.getActiveTasks().size(); if (speculationEnabled && tip.hasSpeculativeTask(time, job.getStatus().mapProgress())) demand += 1; } else { // Need to launch 1 task demand += 1; } } } } }
// create a new Task for the given tip and make it running private TaskAttemptID createAndAddAttempt(TaskInProgress tip, int attemptId) { // create a fake attempt for this fake task TaskAttemptID taskid = new TaskAttemptID(tip.getTIPId(), attemptId); // insert this fake task into the jobtracker by making it running tip.addRunningTask(taskid, "test-tt"); return taskid; }
/** * A tracker wants to know if any of its Tasks have been closed (because the job completed, * whether successfully or not) */ public synchronized String pollForTaskWithClosedJob(String taskTracker) { TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskIds != null) { for (Iterator it = taskIds.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); if (tip.shouldCloseForClosedJob(taskId)) { // // This is how the JobTracker ends a task at the TaskTracker. // It may be successfully completed, or may be killed in // mid-execution. // return taskId; } } } return null; }
public synchronized TaskReport[] getReduceTaskReports(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job == null) { return new TaskReport[0]; } else { Vector reports = new Vector(); Vector completeReduceTasks = job.reportTasksInProgress(false, true); for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } Vector incompleteReduceTasks = job.reportTasksInProgress(false, false); for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]); } }
/** * Accept and process a new TaskTracker profile. We might have known about the TaskTracker * previously, or it might be brand-new. All task-tracker structures have already been updated. * Just process the contained tasks and any jobs that might be affected. */ void updateTaskStatuses(TaskTrackerStatus status) { for (Iterator it = status.taskReports(); it.hasNext(); ) { TaskStatus report = (TaskStatus) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId()); if (tip == null) { LOG.info( "Serious problem. While updating status, cannot find taskid " + report.getTaskId()); } else { JobInProgress job = tip.getJob(); job.updateTaskStatus(tip, report); if (report.getRunState() == TaskStatus.SUCCEEDED) { job.completedTask(tip, report.getTaskId()); } else if (report.getRunState() == TaskStatus.FAILED) { // Tell the job to fail the relevant task job.failedTask(tip, report.getTaskId(), status.getTrackerName()); } } } }
// Mock a job run such that the jobtracker is in a state similar to that // resulting from an actual job run. // Steps : // - generate a new job-id // - create and add a JobInProgress object using the fake job-id // - create and add a fake tip of the passed type 't' under the fake job // Note that t can be a MAP or a REDUCE or a JOB_SETUP or a JOB_CLEANUP. // - create and add a fake attempt under the fake tip // - remove the job from the jobtracker // - check if the fake attempt is removed from the jobtracker private void testRemoveJobTasks(JobTracker jobtracker, JobConf conf, TaskType type) { // create and submit a job JobInProgress jip = createAndAddJob(jobtracker, conf); // create and add a tip TaskInProgress tip = createAndAddTIP(jobtracker, jip, type); // create and add an attempt TaskAttemptID taskid = createAndAddAttempt(tip, 0); // this fake attempt should not have any status assertNull(tip.getTaskStatus(taskid)); // remove the job tasks for this fake job from the jobtracker jobtracker.removeJobTasks(jip); // check the taskidToTIPMap for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) { LOG.info("TaskidToTIP : " + tid); } // check if the fake attempt is removed from the jobtracker assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size()); }
/** * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks. * This exists so the reduce task thread can locate map task outputs. */ public synchronized MapOutputLocation[] locateMapOutputs( String taskId, String[][] mapTasksNeeded) { ArrayList v = new ArrayList(); for (int i = 0; i < mapTasksNeeded.length; i++) { for (int j = 0; j < mapTasksNeeded[i].length; j++) { TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]); if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) { String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]); TaskTrackerStatus tracker; synchronized (taskTrackers) { tracker = (TaskTrackerStatus) taskTrackers.get(trackerId); } v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort())); break; } } } // randomly shuffle results to load-balance map output requests Collections.shuffle(v); return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]); }
private void updateTaskCounts() { for (Map.Entry<JobInProgress, JobInfo> entry : infos.entrySet()) { JobInProgress job = entry.getKey(); JobInfo info = entry.getValue(); if (job.getStatus().getRunState() != JobStatus.RUNNING) continue; // Job is still in PREP state and tasks aren't initialized // Count maps int totalMaps = job.numMapTasks; int finishedMaps = 0; int runningMaps = 0; for (TaskInProgress tip : job.getMapTasks()) { if (tip.isComplete()) { finishedMaps += 1; } else if (tip.isRunning()) { runningMaps += tip.getActiveTasks().size(); } } info.runningMaps = runningMaps; info.neededMaps = (totalMaps - runningMaps - finishedMaps + taskSelector.neededSpeculativeMaps(job)); // Count reduces int totalReduces = job.numReduceTasks; int finishedReduces = 0; int runningReduces = 0; for (TaskInProgress tip : job.getReduceTasks()) { if (tip.isComplete()) { finishedReduces += 1; } else if (tip.isRunning()) { runningReduces += tip.getActiveTasks().size(); } } info.runningReduces = runningReduces; info.neededReduces = (totalReduces - runningReduces - finishedReduces + taskSelector.neededSpeculativeReduces(job)); // If the job was marked as not runnable due to its user or pool having // too many active jobs, set the neededMaps/neededReduces to 0. We still // count runningMaps/runningReduces however so we can give it a deficit. if (!info.runnable) { info.neededMaps = 0; info.neededReduces = 0; } } }
/////////////////////////////////////////////////////// // Maintain lookup tables; called by JobInProgress // and TaskInProgress /////////////////////////////////////////////////////// void createTaskEntry(String taskid, String taskTracker, TaskInProgress tip) { LOG.info( "Adding task '" + taskid + "' to tip " + tip.getTIPId() + ", for tracker '" + taskTracker + "'"); // taskid --> tracker taskidToTrackerMap.put(taskid, taskTracker); // tracker --> taskid TreeSet taskset = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskset == null) { taskset = new TreeSet(); trackerToTaskMap.put(taskTracker, taskset); } taskset.add(taskid); // taskid --> TIP taskidToTIPMap.put(taskid, tip); }
/** * Test job retire with tasks that report their *first* status only after the job retires. Steps : * - Start a mini-mr cluster with 1 task-tracker having only map slots. Note that this * task-tracker will take care of setup/cleanup and the map tasks. - Submit a job with 1 map task * and 1 reduce task - Wait for the job to finish the map task - Start a 2nd tracker that waits * for a long time after contacting the JT. - Wait for the 2nd tracker to get stuck - Kill the job * - Wait for the job to retire - Check if the tip mappings are cleaned up. */ public void testJobRetireWithUnreportedTasks() throws Exception { MiniMRCluster mr = null; try { JobConf conf = new JobConf(); // set the num-map-slots to 1 so that no reduce tasks but setup/cleanup // can run on it conf.setInt("mapred.tasktracker.map.tasks.maximum", 1); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 0); mr = startCluster(conf, 1); JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker(); RunningJob job = UtilsForTests.runJob( mr.createJobConf(), new Path(testDir, "in-1"), new Path(testDir, "out-1"), 1, 1); JobID id = JobID.downgrade(job.getID()); JobInProgress jip = jobtracker.getJob(id); // wait 100 secs for the job to complete its map task for (int i = 0; i < 1000 && jip.finishedMaps() < 1; i++) { UtilsForTests.waitFor(100); } assertEquals(jip.finishedMaps(), 1); // start a tracker that will wait LOG.info("Adding a waiting tracker"); TaskTrackerRunner testTrackerRunner = mr.new TaskTrackerRunner(1, 1, null, mr.createJobConf()) { @Override TaskTracker createTaskTracker(JobConf conf) throws InterruptedException, IOException { return new WaitingTaskTracker(conf); } }; mr.addTaskTracker(testTrackerRunner); LOG.info("Waiting tracker added"); WaitingTaskTracker testTT = (WaitingTaskTracker) testTrackerRunner.getTaskTracker(); // wait 100 secs for the newly started task-tracker to join for (int i = 0; i < 1000 && jobtracker.taskTrackers().size() < 2; i++) { UtilsForTests.waitFor(100); } assertEquals(jobtracker.taskTrackers().size(), 2); LOG.info("Cluster is now up with 2 trackers"); // stop the test-tt as its no longer required mr.stopTaskTracker(mr.getTaskTrackerID(testTT.getName())); // 1 reduce task should be scheduled assertEquals("TestTT contacted but no reduce task scheduled on it", 1, jip.runningReduces()); // kill the job LOG.info("Killing job " + id); job.killJob(); // check if the reduce task attempt status is missing TaskInProgress tip = jip.getTasks(TaskType.REDUCE)[0]; assertNull(tip.getTaskStatus(tip.getAllTaskAttemptIDs()[0])); // wait for the job to retire waitTillRetire(id, jobtracker); // check the taskidToTIPMap for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) { LOG.info("TaskidToTIP : " + tid); } assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size()); } finally { if (mr != null) { mr.shutdown(); } // cleanup FileUtil.fullyDelete(new File(testDir.toString())); } }
private void printFailedAttempts( JspWriter out, JobTracker tracker, JobID jobId, TaskInProgress tip, TaskStatus.State failState) throws IOException { TaskStatus[] statuses = tip.getTaskStatuses(); TaskID tipId = tip.getTIPId(); for (int i = 0; i < statuses.length; ++i) { TaskStatus.State taskState = statuses[i].getRunState(); if ((failState == null && (taskState == TaskStatus.State.FAILED || taskState == TaskStatus.State.KILLED)) || taskState == failState) { String taskTrackerName = statuses[i].getTaskTracker(); TaskTrackerStatus taskTracker = tracker.getTaskTrackerStatus(taskTrackerName); out.print( "<tr><td>" + statuses[i].getTaskID() + "</td><td><a href=\"taskdetails.jsp?jobid=" + jobId + "&tipid=" + tipId + "\">" + tipId + "</a></td>"); if (taskTracker == null) { out.print("<td>" + taskTrackerName + "</td>"); } else { out.print( "<td><a href=\"http://" + taskTracker.getHost() + ":" + taskTracker.getHttpPort() + "\">" + taskTracker.getHost() + "</a></td>"); } out.print("<td>" + taskState + "</td>"); out.print("<td><pre>"); String[] failures = tracker.getTaskDiagnostics(statuses[i].getTaskID()); if (failures == null) { out.print(" "); } else { for (int j = 0; j < failures.length; j++) { out.print(failures[j]); if (j < (failures.length - 1)) { out.print("\n-------\n"); } } } out.print("</pre></td>"); out.print("<td>"); String taskLogUrl = null; if (taskTracker != null) { taskLogUrl = TaskLogServlet.getTaskLogUrl( taskTracker.getHost(), String.valueOf(taskTracker.getHttpPort()), statuses[i].getTaskID().toString()); } if (taskLogUrl != null) { String tailFourKBUrl = taskLogUrl + "&start=-4097"; String tailEightKBUrl = taskLogUrl + "&start=-8193"; String entireLogUrl = taskLogUrl; out.print("<a href=\"" + tailFourKBUrl + "\">Last 4KB</a><br/>"); out.print("<a href=\"" + tailEightKBUrl + "\">Last 8KB</a><br/>"); out.print("<a href=\"" + entireLogUrl + "\">All</a><br/>"); } else { out.print("n/a"); // task tracker was lost } out.print("</td>"); out.print("</tr>\n"); } } }