/** * JobTracker.submitJob() kicks off a new job. * * <p>Create a 'JobInProgress' object, which contains both JobProfile and JobStatus. Those two * sub-objects are sometimes shipped outside of the JobTracker. But JobInProgress adds info that's * useful for the JobTracker alone. * * <p>We add the JIP to the jobInitQueue, which is processed asynchronously to handle * split-computation and build up the right TaskTracker/Block mapping. */ public synchronized JobStatus submitJob(String jobFile) throws IOException { totalSubmissions++; JobInProgress job = new JobInProgress(jobFile, this, this.conf); synchronized (jobs) { synchronized (jobsByArrival) { synchronized (jobInitQueue) { jobs.put(job.getProfile().getJobId(), job); jobsByArrival.add(job); jobInitQueue.add(job); jobInitQueue.notifyAll(); } } } return job.getStatus(); }
/** * We lost the task tracker! All task-tracker structures have already been updated. Just process * the contained tasks and any jobs that might be affected. */ void lostTaskTracker(String trackerName) { LOG.info("Lost tracker '" + trackerName + "'"); TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName); trackerToTaskMap.remove(trackerName); if (lostTasks != null) { for (Iterator it = lostTasks.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); // Tell the job to fail the relevant task JobInProgress job = tip.getJob(); job.failedTask(tip, taskId, trackerName); } } }
/** * The run method lives for the life of the JobTracker, and removes Jobs that are not still * running, but which finished a long time ago. */ public void run() { while (shouldRun) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); } catch (InterruptedException ie) { } synchronized (jobs) { synchronized (jobInitQueue) { synchronized (jobsByArrival) { for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) { String jobid = (String) it.next(); JobInProgress job = (JobInProgress) jobs.get(jobid); if (job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) { it.remove(); jobInitQueue.remove(job); jobsByArrival.remove(job); } } } } } } }
public synchronized JobStatus getJobStatus(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job != null) { return job.getStatus(); } else { return null; } }
public synchronized JobProfile getJobProfile(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job != null) { return job.getProfile(); } else { return null; } }
private void printFailures( JspWriter out, JobTracker tracker, JobID jobId, String kind, String cause) throws IOException { JobInProgress job = (JobInProgress) tracker.getJob(jobId); if (job == null) { out.print("<b>Job " + jobId + " not found.</b><br>\n"); return; } boolean includeMap = false; boolean includeReduce = false; if (kind == null) { includeMap = true; includeReduce = true; } else if ("map".equals(kind)) { includeMap = true; } else if ("reduce".equals(kind)) { includeReduce = true; } else if ("all".equals(kind)) { includeMap = true; includeReduce = true; } else { out.print("<b>Kind " + kind + " not supported.</b><br>\n"); return; } TaskStatus.State state = null; try { if (cause != null) { state = TaskStatus.State.valueOf(cause.toUpperCase()); if (state != TaskStatus.State.FAILED && state != TaskStatus.State.KILLED) { out.print("<b>Cause '" + cause + "' is not an 'unsuccessful' state.</b><br>\n"); return; } } } catch (IllegalArgumentException e) { out.print("<b>Cause '" + cause + "' not supported.</b><br>\n"); return; } out.print("<table border=2 cellpadding=\"5\" cellspacing=\"2\">"); out.print( "<tr><th>Attempt</th><th>Task</th><th>Machine</th><th>State</th>" + "<th>Error</th><th>Logs</th></tr>\n"); if (includeMap) { TaskInProgress[] tips = job.getTasks(TaskType.MAP); for (int i = 0; i < tips.length; ++i) { printFailedAttempts(out, tracker, jobId, tips[i], state); } } if (includeReduce) { TaskInProgress[] tips = job.getTasks(TaskType.REDUCE); for (int i = 0; i < tips.length; ++i) { printFailedAttempts(out, tracker, jobId, tips[i], state); } } out.print("</table>\n"); }
public Vector completedJobs() { Vector v = new Vector(); for (Iterator it = jobs.values().iterator(); it.hasNext(); ) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.SUCCEEDED) { v.add(jip); } } return v; }
public Vector runningJobs() { Vector v = new Vector(); for (Iterator it = jobs.values().iterator(); it.hasNext(); ) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.RUNNING) { v.add(jip); } } return v; }
public synchronized TaskReport[] getReduceTaskReports(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); if (job == null) { return new TaskReport[0]; } else { Vector reports = new Vector(); Vector completeReduceTasks = job.reportTasksInProgress(false, true); for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } Vector incompleteReduceTasks = job.reportTasksInProgress(false, false); for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]); } }
/** * Accept and process a new TaskTracker profile. We might have known about the TaskTracker * previously, or it might be brand-new. All task-tracker structures have already been updated. * Just process the contained tasks and any jobs that might be affected. */ void updateTaskStatuses(TaskTrackerStatus status) { for (Iterator it = status.taskReports(); it.hasNext(); ) { TaskStatus report = (TaskStatus) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId()); if (tip == null) { LOG.info( "Serious problem. While updating status, cannot find taskid " + report.getTaskId()); } else { JobInProgress job = tip.getJob(); job.updateTaskStatus(tip, report); if (report.getRunState() == TaskStatus.SUCCEEDED) { job.completedTask(tip, report.getTaskId()); } else if (report.getRunState() == TaskStatus.FAILED) { // Tell the job to fail the relevant task job.failedTask(tip, report.getTaskId(), status.getTrackerName()); } } } }
public void run() { while (shouldRun) { JobInProgress job = null; synchronized (jobInitQueue) { if (jobInitQueue.size() > 0) { job = (JobInProgress) jobInitQueue.elementAt(0); jobInitQueue.remove(job); } else { try { jobInitQueue.wait(JOBINIT_SLEEP_INTERVAL); } catch (InterruptedException iex) { } } } try { if (job != null) { job.initTasks(); } } catch (Exception e) { LOG.log(Level.WARNING, "job init failed", e); job.kill(); } } }
public void _jspService(HttpServletRequest request, HttpServletResponse response) throws java.io.IOException, ServletException { PageContext pageContext = null; HttpSession session = null; ServletContext application = null; ServletConfig config = null; JspWriter out = null; Object page = this; JspWriter _jspx_out = null; PageContext _jspx_page_context = null; try { response.setContentType("text/html; charset=UTF-8"); pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true); _jspx_page_context = pageContext; application = pageContext.getServletContext(); config = pageContext.getServletConfig(); session = pageContext.getSession(); out = pageContext.getOut(); _jspx_out = out; _jspx_resourceInjector = (org.apache.jasper.runtime.ResourceInjector) application.getAttribute("com.sun.appserv.jsp.resource.injector"); out.write('\n'); JobTracker tracker = (JobTracker) application.getAttribute("job.tracker"); ClusterStatus status = tracker.getClusterStatus(); String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine()); out.write("\n<html>\n<head>\n<title>"); out.print(trackerName); out.write( " Hadoop Locality Statistics</title>\n<link rel=\"stylesheet\" type=\"text/css\" href=\"/static/hadoop.css\">\n</head>\n<body>\n<h1>"); out.print(trackerName); out.write(" Hadoop Locality Statistics</h1>\n\n<b>State:</b> "); out.print(status.getJobTrackerState()); out.write("<br>\n<b>Started:</b> "); out.print(new Date(tracker.getStartTime())); out.write("<br>\n<b>Version:</b> "); out.print(VersionInfo.getVersion()); out.write(",\n r"); out.print(VersionInfo.getRevision()); out.write("<br>\n<b>Compiled:</b> "); out.print(VersionInfo.getDate()); out.write(" by\n "); out.print(VersionInfo.getUser()); out.write("<br>\n<b>Identifier:</b> "); out.print(tracker.getTrackerIdentifier()); out.write("<br>\n\n<hr>\n\n"); Collection<JobInProgress> jobs = new ArrayList<JobInProgress>(); jobs.addAll(tracker.completedJobs()); jobs.addAll(tracker.runningJobs()); jobs.addAll(tracker.failedJobs()); int dataLocalMaps = 0; int rackLocalMaps = 0; int totalMaps = 0; int totalReduces = 0; for (JobInProgress job : jobs) { Counters counters = job.getCounters(); dataLocalMaps += counters.getCounter(JobInProgress.Counter.DATA_LOCAL_MAPS); rackLocalMaps += counters.getCounter(JobInProgress.Counter.RACK_LOCAL_MAPS); totalMaps += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS); totalReduces += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES); } int dataLocalMapPct = totalMaps == 0 ? 0 : (100 * dataLocalMaps) / totalMaps; int rackLocalMapPct = totalMaps == 0 ? 0 : (100 * rackLocalMaps) / totalMaps; int dataRackLocalMapPct = totalMaps == 0 ? 0 : (100 * (dataLocalMaps + rackLocalMaps)) / totalMaps; out.write("\n<p>\n<b>Data Local Maps:</b> "); out.print(dataLocalMaps); out.write(' '); out.write('('); out.print(dataLocalMapPct); out.write("%) <br>\n<b>Rack Local Maps:</b> "); out.print(rackLocalMaps); out.write(' '); out.write('('); out.print(rackLocalMapPct); out.write("%) <br>\n<b>Data or Rack Local:</b> "); out.print(dataLocalMaps + rackLocalMaps); out.write(' '); out.write('('); out.print(dataRackLocalMapPct); out.write("%) <br>\n<b>Total Maps:</b> "); out.print(totalMaps); out.write(" <br>\n<b>Total Reduces:</b> "); out.print(totalReduces); out.write(" <br>\n</p>\n\n"); out.println(ServletUtil.htmlFooter()); out.write('\n'); } catch (Throwable t) { if (!(t instanceof SkipPageException)) { out = _jspx_out; if (out != null && out.getBufferSize() != 0) out.clearBuffer(); if (_jspx_page_context != null) _jspx_page_context.handlePageException(t); } } finally { _jspxFactory.releasePageContext(_jspx_page_context); } }
public synchronized void killJob(String jobid) { JobInProgress job = (JobInProgress) jobs.get(jobid); job.kill(); }
/** * A tracker wants to know if there's a Task to run. Returns a task we'd like the TaskTracker to * execute right now. * * <p>Eventually this function should compute load on the various TaskTrackers, and incorporate * knowledge of DFS file placement. But for right now, it just grabs a single item out of the * pending task list and hands it back. */ public synchronized Task pollForNewTask(String taskTracker) { // // Compute average map and reduce task numbers across pool // int avgMaps = 0; int avgReduces = 0; int numTaskTrackers; TaskTrackerStatus tts; synchronized (taskTrackers) { numTaskTrackers = taskTrackers.size(); tts = (TaskTrackerStatus) taskTrackers.get(taskTracker); } if (numTaskTrackers > 0) { avgMaps = totalMaps / numTaskTrackers; avgReduces = totalReduces / numTaskTrackers; } int totalCapacity = numTaskTrackers * maxCurrentTasks; // // Get map + reduce counts for the current tracker. // if (tts == null) { LOG.warning("Unknown task tracker polling; ignoring: " + taskTracker); return null; } int numMaps = tts.countMapTasks(); int numReduces = tts.countReduceTasks(); // // In the below steps, we allocate first a map task (if appropriate), // and then a reduce task if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We hand a task to the current taskTracker if the given machine // has a workload that's equal to or less than the averageMaps // +/- TASK_ALLOC_EPSILON. (That epsilon is in place in case // there is an odd machine that is failing for some reason but // has not yet been removed from the pool, making capacity seem // larger than it really is.) // synchronized (jobsByArrival) { if ((numMaps < maxCurrentTasks) && (numMaps <= (avgMaps + TASK_ALLOC_EPSILON))) { int totalNeededMaps = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewMapTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededMaps += job.desiredMaps(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededMaps * PAD_FRACTION); } if (totalNeededMaps + padding >= totalCapacity) { break; } } } // // Same thing, but for reduce tasks // if ((numReduces < maxCurrentTasks) && (numReduces <= (avgReduces + TASK_ALLOC_EPSILON))) { int totalNeededReduces = 0; for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) { JobInProgress job = (JobInProgress) it.next(); if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = job.obtainNewReduceTask(taskTracker, tts); if (t != null) { return t; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededReduces += job.desiredReduces(); double padding = 0; if (totalCapacity > MIN_SLOTS_FOR_PADDING) { padding = Math.min(maxCurrentTasks, totalNeededReduces * PAD_FRACTION); } if (totalNeededReduces + padding >= totalCapacity) { break; } } } } return null; }