/** Return a Task that can be sent to a TaskTracker for execution. */ public Task getTaskToRun(String taskTracker) { // Create the 'taskid'; do not count the 'killed' tasks against the job! TaskAttemptID taskid = null; if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) { // Make sure that the attempts are unqiue across restarts int attemptId = job.getNumRestarts() * NUM_ATTEMPTS_PER_RESTART + nextTaskId; taskid = new TaskAttemptID(id, attemptId); ++nextTaskId; } else { LOG.warn( "Exceeded limit of " + (MAX_TASK_EXECS + maxTaskAttempts) + " (plus " + numKilledTasks + " killed)" + " attempts for the tip '" + getTIPId() + "'"); return null; } // keep track of the last time we started an attempt at this TIP // used to calculate the progress rate of this TIP setDispatchTime(taskid, JobTracker.getClock().getTime()); if (0 == execStartTime) { // assume task starts running now execStartTime = JobTracker.getClock().getTime(); } return addRunningTask(taskid, taskTracker); }
public void testJobRetire() throws Exception { MiniMRCluster mr = null; try { JobConf conf = new JobConf(); mr = startCluster(conf, 1); JobConf jobConf = mr.createJobConf(); JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker(); Path inDir = new Path(testDir, "input1"); Path outDir = new Path(testDir, "output1"); JobID id1 = validateJobRetire(jobConf, inDir, outDir, jobtracker); outDir = new Path(testDir, "output2"); JobID id2 = validateJobRetire(jobConf, inDir, outDir, jobtracker); assertNull("Job not removed from cache", jobtracker.getJobStatus(id1)); assertEquals("Total job in cache not correct", 1, jobtracker.getAllJobs().length); } finally { if (mr != null) { mr.shutdown(); } FileUtil.fullyDelete(new File(testDir.toString())); } }
// wait till the job retires private void waitTillRetire(JobID id, JobTracker jobtracker) { // wait for job to get retired JobInProgress job = jobtracker.getJob(id); for (int i = 0; i < 10 && job != null; i++) { UtilsForTests.waitFor(1000); job = jobtracker.getJob(id); } assertNull("Job did not retire", job); }
private void printFailures( JspWriter out, JobTracker tracker, JobID jobId, String kind, String cause) throws IOException { JobInProgress job = (JobInProgress) tracker.getJob(jobId); if (job == null) { out.print("<b>Job " + jobId + " not found.</b><br>\n"); return; } boolean includeMap = false; boolean includeReduce = false; if (kind == null) { includeMap = true; includeReduce = true; } else if ("map".equals(kind)) { includeMap = true; } else if ("reduce".equals(kind)) { includeReduce = true; } else if ("all".equals(kind)) { includeMap = true; includeReduce = true; } else { out.print("<b>Kind " + kind + " not supported.</b><br>\n"); return; } TaskStatus.State state = null; try { if (cause != null) { state = TaskStatus.State.valueOf(cause.toUpperCase()); if (state != TaskStatus.State.FAILED && state != TaskStatus.State.KILLED) { out.print("<b>Cause '" + cause + "' is not an 'unsuccessful' state.</b><br>\n"); return; } } } catch (IllegalArgumentException e) { out.print("<b>Cause '" + cause + "' not supported.</b><br>\n"); return; } out.print("<table border=2 cellpadding=\"5\" cellspacing=\"2\">"); out.print( "<tr><th>Attempt</th><th>Task</th><th>Machine</th><th>State</th>" + "<th>Error</th><th>Logs</th></tr>\n"); if (includeMap) { TaskInProgress[] tips = job.getTasks(TaskType.MAP); for (int i = 0; i < tips.length; ++i) { printFailedAttempts(out, tracker, jobId, tips[i], state); } } if (includeReduce) { TaskInProgress[] tips = job.getTasks(TaskType.REDUCE); for (int i = 0; i < tips.length; ++i) { printFailedAttempts(out, tracker, jobId, tips[i], state); } } out.print("</table>\n"); }
/** The TIP's been ordered kill()ed. */ public void kill() { if (isComplete() || failed) { return; } this.failed = true; killed = true; this.execFinishTime = JobTracker.getClock().getTime(); recomputeProgress(); }
/** Shutdown the job tracker and wait for it to finish. */ public void shutdown() { try { if (tracker != null) { tracker.stopTracker(); } } catch (Throwable e) { LOG.error("Problem shutting down job tracker", e); } isActive = false; }
// create a new job and add it to the jobtracker private JobInProgress createAndAddJob(JobTracker jobtracker, JobConf conf) { // submit a job in a fake manner // get the new job-id JobID id = new JobID(jobtracker.getTrackerIdentifier(), jobtracker.jobs.size() + 1); // create a JobInProgress for this fake job JobInProgress jip = new JobInProgress(id, conf, jobtracker); // insert this fake completed job in the jobtracker jobtracker.jobs.put(id, jip); return jip; }
private JobID validateJobRetire(JobConf jobConf, Path inDir, Path outDir, JobTracker jobtracker) throws IOException { RunningJob rj = UtilsForTests.runJob(jobConf, inDir, outDir, 0, 0); rj.waitForCompletion(); assertTrue(rj.isSuccessful()); JobID id = rj.getID(); // wait for job to get retired waitTillRetire(id, jobtracker); RetireJobInfo retired = jobtracker.retireJobs.get(id); assertTrue( "History url not set", retired.getHistoryFile() != null && retired.getHistoryFile().length() > 0); assertNotNull("Job is not in cache", jobtracker.getJobStatus(id)); // get the job conf filename String name = jobtracker.getLocalJobFilePath(id); File file = new File(name); assertFalse("JobConf file not deleted", file.exists()); // test redirection URL jobUrl = new URL(rj.getTrackingURL()); HttpURLConnection conn = (HttpURLConnection) jobUrl.openConnection(); conn.setInstanceFollowRedirects(false); conn.connect(); assertEquals(HttpURLConnection.HTTP_MOVED_TEMP, conn.getResponseCode()); conn.disconnect(); URL redirectedUrl = new URL(conn.getHeaderField("Location")); conn = (HttpURLConnection) redirectedUrl.openConnection(); conn.connect(); assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); conn.disconnect(); return id; }
public static void startTracker(Configuration conf) throws IOException { if (tracker != null) throw new IOException("JobTracker already running."); while (true) { try { tracker = new JobTracker(conf); break; } catch (IOException e) { LOG.log(Level.WARNING, "Starting tracker", e); } try { Thread.sleep(1000); } catch (InterruptedException e) { } } tracker.offerService(); }
/** Indicate that one of the taskids in this TaskInProgress has successfully completed! */ public void completed(TaskAttemptID taskid) { // // Record that this taskid is complete // completedTask(taskid, TaskStatus.State.SUCCEEDED); // Note the successful taskid setSuccessfulTaskid(taskid); // // Now that the TIP is complete, the other speculative // subtasks will be closed when the owning tasktracker // reports in and calls shouldClose() on this object. // this.completes++; this.execFinishTime = JobTracker.getClock().getTime(); recomputeProgress(); }
// Mock a job run such that the jobtracker is in a state similar to that // resulting from an actual job run. // Steps : // - generate a new job-id // - create and add a JobInProgress object using the fake job-id // - create and add a fake tip of the passed type 't' under the fake job // Note that t can be a MAP or a REDUCE or a JOB_SETUP or a JOB_CLEANUP. // - create and add a fake attempt under the fake tip // - remove the job from the jobtracker // - check if the fake attempt is removed from the jobtracker private void testRemoveJobTasks(JobTracker jobtracker, JobConf conf, TaskType type) { // create and submit a job JobInProgress jip = createAndAddJob(jobtracker, conf); // create and add a tip TaskInProgress tip = createAndAddTIP(jobtracker, jip, type); // create and add an attempt TaskAttemptID taskid = createAndAddAttempt(tip, 0); // this fake attempt should not have any status assertNull(tip.getTaskStatus(taskid)); // remove the job tasks for this fake job from the jobtracker jobtracker.removeJobTasks(jip); // check the taskidToTIPMap for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) { LOG.info("TaskidToTIP : " + tid); } // check if the fake attempt is removed from the jobtracker assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size()); }
/** Initialization common to Map and Reduce */ void init(JobID jobId) { this.startTime = JobTracker.getClock().getTime(); this.id = new TaskID(jobId, isMapTask(), partition); this.skipping = startSkipping(); long speculativeDuration; if (isMapTask()) { this.speculativeLag = conf.getMapSpeculativeLag(); speculativeDuration = conf.getMapSpeculativeDuration(); } else { this.speculativeLag = conf.getReduceSpeculativeLag(); speculativeDuration = conf.getReduceSpeculativeDuration(); } // speculate only if 1/(1000 * progress_rate) > speculativeDuration // ie. : // speculate only if progress_rate < 1/(1000 * speculativeDuration) if (speculativeDuration > 0) { this.maxProgressRateForSpeculation = 1.0 / (1000.0 * speculativeDuration); } else { // disable this check for durations <= 0 this.maxProgressRateForSpeculation = -1.0; } }
/** Create the job tracker and run it. */ public void run() { try { jc = (jc == null) ? createJobConf() : createJobConf(jc); File f = new File("build/test/mapred/local").getAbsoluteFile(); jc.set("mapred.local.dir", f.getAbsolutePath()); jc.setClass( "topology.node.switch.mapping.impl", StaticMapping.class, DNSToSwitchMapping.class); final String id = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()); if (ugi == null) { ugi = UserGroupInformation.getCurrentUser(); } tracker = ugi.doAs( new PrivilegedExceptionAction<JobTracker>() { public JobTracker run() throws InterruptedException, IOException { return JobTracker.startTracker(jc, id); } }); tracker.offerService(); } catch (Throwable e) { LOG.error("Job tracker crashed", e); isActive = false; } }
/** Check refreshNodes for decommissioning blacklisted nodes. */ public void testBlacklistedNodeDecommissioning() throws Exception { LOG.info("Testing blacklisted node decommissioning"); MiniMRCluster mr = null; JobTracker jt = null; try { // start mini mr JobConf jtConf = new JobConf(); jtConf.set("mapred.max.tracker.blacklists", "1"); mr = new MiniMRCluster(0, 0, 2, "file:///", 1, null, null, null, jtConf); jt = mr.getJobTrackerRunner().getJobTracker(); assertEquals("Trackers not up", 2, jt.taskTrackers().size()); // validate the total tracker count assertEquals( "Active tracker count mismatch", 2, jt.getClusterStatus(false).getTaskTrackers()); // validate blacklisted count assertEquals( "Blacklisted tracker count mismatch", 0, jt.getClusterStatus(false).getBlacklistedTrackers()); // run a failing job to blacklist the tracker JobConf jConf = mr.createJobConf(); jConf.set("mapred.max.tracker.failures", "1"); jConf.setJobName("test-job-fail-once"); jConf.setMapperClass(FailOnceMapper.class); jConf.setReducerClass(IdentityReducer.class); jConf.setNumMapTasks(1); jConf.setNumReduceTasks(0); RunningJob job = UtilsForTests.runJob(jConf, new Path(TEST_DIR, "in"), new Path(TEST_DIR, "out")); job.waitForCompletion(); // validate the total tracker count assertEquals( "Active tracker count mismatch", 1, jt.getClusterStatus(false).getTaskTrackers()); // validate blacklisted count assertEquals( "Blacklisted tracker count mismatch", 1, jt.getClusterStatus(false).getBlacklistedTrackers()); // find the blacklisted tracker String trackerName = null; for (TaskTrackerStatus status : jt.taskTrackers()) { if (jt.isBlacklisted(status.getTrackerName())) { trackerName = status.getTrackerName(); break; } } // get the hostname String hostToDecommission = JobInProgress.convertTrackerNameToHostName(trackerName); LOG.info("Decommissioning tracker " + hostToDecommission); // decommission the node HashSet<String> decom = new HashSet<String>(1); decom.add(hostToDecommission); jt.decommissionNodes(decom); // validate // check the cluster status and tracker size assertEquals( "Tracker is not lost upon host decommissioning", 1, jt.getClusterStatus(false).getTaskTrackers()); assertEquals( "Blacklisted tracker count incorrect in cluster status " + "after decommissioning", 0, jt.getClusterStatus(false).getBlacklistedTrackers()); assertEquals("Tracker is not lost upon host decommissioning", 1, jt.taskTrackers().size()); } finally { if (mr != null) { mr.shutdown(); mr = null; jt = null; FileUtil.fullyDelete(new File(TEST_DIR.toString())); } } }
public void _jspService(HttpServletRequest request, HttpServletResponse response) throws java.io.IOException, ServletException { PageContext pageContext = null; HttpSession session = null; ServletContext application = null; ServletConfig config = null; JspWriter out = null; Object page = this; JspWriter _jspx_out = null; PageContext _jspx_page_context = null; try { response.setContentType("text/html; charset=UTF-8"); pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true); _jspx_page_context = pageContext; application = pageContext.getServletContext(); config = pageContext.getServletConfig(); session = pageContext.getSession(); out = pageContext.getOut(); _jspx_out = out; _jspx_resourceInjector = (org.apache.jasper.runtime.ResourceInjector) application.getAttribute("com.sun.appserv.jsp.resource.injector"); out.write('\n'); out.write('\n'); JobTracker tracker = (JobTracker) application.getAttribute("job.tracker"); String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine()); out.write('\n'); out.write('\n'); out.write('\n'); String jobId = request.getParameter("jobid"); if (jobId == null) { out.println("<h2>Missing 'jobid'!</h2>"); return; } JobID jobIdObj = JobID.forName(jobId); String kind = request.getParameter("kind"); String cause = request.getParameter("cause"); out.write("\n\n<html>\n<title>Hadoop "); out.print(jobId); out.write(" failures on "); out.print(trackerName); out.write("</title>\n<body>\n<h1>Hadoop <a href=\"jobdetails.jsp?jobid="); out.print(jobId); out.write('"'); out.write('>'); out.print(jobId); out.write("</a>\nfailures on <a href=\"jobtracker.jsp\">"); out.print(trackerName); out.write("</a></h1>\n\n"); printFailures(out, tracker, jobIdObj, kind, cause); out.write("\n\n<hr>\n<a href=\"jobtracker.jsp\">Go back to JobTracker</a><br>\n"); out.println(ServletUtil.htmlFooter()); out.write('\n'); } catch (Throwable t) { if (!(t instanceof SkipPageException)) { out = _jspx_out; if (out != null && out.getBufferSize() != 0) out.clearBuffer(); if (_jspx_page_context != null) _jspx_page_context.handlePageException(t); } } finally { _jspxFactory.releasePageContext(_jspx_page_context); } }
/** Indicate that one of the taskids in this TaskInProgress has failed. */ public void incompleteSubTask(TaskAttemptID taskid, JobStatus jobStatus) { // // Note the failure and its location // TaskStatus status = taskStatuses.get(taskid); String trackerName; String trackerHostName = null; TaskStatus.State taskState = TaskStatus.State.FAILED; if (status != null) { trackerName = status.getTaskTracker(); trackerHostName = JobInProgressTraits.convertTrackerNameToHostName(trackerName); // Check if the user manually KILLED/FAILED this task-attempt... Boolean shouldFail = tasksToKill.remove(taskid); if (shouldFail != null) { if (status.getRunState() == TaskStatus.State.FAILED || status.getRunState() == TaskStatus.State.KILLED) { taskState = (shouldFail) ? TaskStatus.State.FAILED : TaskStatus.State.KILLED; } else { taskState = (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN : TaskStatus.State.KILLED_UNCLEAN; } status.setRunState(taskState); addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user"); } taskState = status.getRunState(); if (taskState != TaskStatus.State.FAILED && taskState != TaskStatus.State.KILLED && taskState != TaskStatus.State.FAILED_UNCLEAN && taskState != TaskStatus.State.KILLED_UNCLEAN) { LOG.info( "Task '" + taskid + "' running on '" + trackerName + "' in state: '" + taskState + "' being failed!"); status.setRunState(TaskStatus.State.FAILED); taskState = TaskStatus.State.FAILED; } // tasktracker went down and failed time was not reported. if (0 == status.getFinishTime()) { status.setFinishTime(JobTracker.getClock().getTime()); } } this.activeTasks.remove(taskid); // Since we do not fail completed reduces (whose outputs go to hdfs), we // should note this failure only for completed maps, only if this taskid; // completed this map. however if the job is done, there is no need to // manipulate completed maps if (this.isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid) && jobStatus.getRunState() != JobStatus.SUCCEEDED) { this.completes--; // Reset the successfulTaskId since we don't have a SUCCESSFUL task now resetSuccessfulTaskid(); } // Note that there can be failures of tasks that are hosted on a machine // that has not yet registered with restarted jobtracker // recalculate the counts only if its a genuine failure if (tasks.contains(taskid)) { if (taskState == TaskStatus.State.FAILED) { numTaskFailures++; machinesWhereFailed.add(trackerHostName); if (maxSkipRecords > 0) { // skipping feature enabled LOG.debug("TaskInProgress adding" + status.getNextRecordRange()); failedRanges.add(status.getNextRecordRange()); skipping = startSkipping(); } } else if (taskState == TaskStatus.State.KILLED) { numKilledTasks++; } } if (numTaskFailures >= maxTaskAttempts) { LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times."); kill(); } }
/** * A status message from a client has arrived. It updates the status of a single * component-thread-task, which might result in an overall TaskInProgress status update. * * @return has the task changed its state noticeably? */ synchronized boolean updateStatus(TaskStatus status) { TaskAttemptID taskid = status.getTaskID(); String taskTracker = status.getTaskTracker(); String diagInfo = status.getDiagnosticInfo(); TaskStatus oldStatus = taskStatuses.get(taskid); boolean changed = true; if (diagInfo != null && diagInfo.length() > 0) { long runTime = status.getRunTime(); LOG.info( "Error from " + taskid + " on " + taskTracker + " runTime(msec) " + runTime + ": " + diagInfo); addDiagnosticInfo(taskid, diagInfo); } if (skipping) { failedRanges.updateState(status); } if (oldStatus != null) { TaskStatus.State oldState = oldStatus.getRunState(); TaskStatus.State newState = status.getRunState(); // We should never recieve a duplicate success/failure/killed // status update for the same taskid! This is a safety check, // and is addressed better at the TaskTracker to ensure this. // @see {@link TaskTracker.transmitHeartbeat()} if ((newState != TaskStatus.State.RUNNING && newState != TaskStatus.State.COMMIT_PENDING && newState != TaskStatus.State.FAILED_UNCLEAN && newState != TaskStatus.State.KILLED_UNCLEAN && newState != TaskStatus.State.UNASSIGNED) && (oldState == newState)) { LOG.warn( "Recieved duplicate status update of '" + newState + "' for '" + taskid + "' of TIP '" + getTIPId() + "'" + "oldTT=" + oldStatus.getTaskTracker() + " while newTT=" + status.getTaskTracker()); return false; } // The task is not allowed to move from completed back to running. // We have seen out of order status messagesmoving tasks from complete // to running. This is a spot fix, but it should be addressed more // globally. if ((newState == TaskStatus.State.RUNNING || newState == TaskStatus.State.UNASSIGNED) && (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED || oldState == TaskStatus.State.FAILED_UNCLEAN || oldState == TaskStatus.State.KILLED_UNCLEAN || oldState == TaskStatus.State.SUCCEEDED || oldState == TaskStatus.State.COMMIT_PENDING)) { return false; } // Do not accept any status once the task is marked FAILED/KILLED // This is to handle the case of the JobTracker timing out a task // due to launch delay, but the TT comes back with any state or // TT got expired if (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED) { tasksToKill.put(taskid, true); return false; } changed = oldState != newState; } // if task is a cleanup attempt, do not replace the complete status, // update only specific fields. // For example, startTime should not be updated, // but finishTime has to be updated. if (!isCleanupAttempt(taskid)) { taskStatuses.put(taskid, status); // we don't want to include setup tasks in the task execution stats if (!isJobSetupTask() && !isJobCleanupTask() && ((isMapTask() && job.hasSpeculativeMaps()) || (!isMapTask() && job.hasSpeculativeReduces()))) { updateProgressRate(JobTracker.getClock().getTime()); } } else { taskStatuses .get(taskid) .statusUpdate( status.getRunState(), status.getProgress(), status.getStateString(), status.getPhase(), status.getFinishTime()); } // Recompute progress recomputeProgress(); return changed; }
/** * Test job retire with tasks that report their *first* status only after the job retires. Steps : * - Start a mini-mr cluster with 1 task-tracker having only map slots. Note that this * task-tracker will take care of setup/cleanup and the map tasks. - Submit a job with 1 map task * and 1 reduce task - Wait for the job to finish the map task - Start a 2nd tracker that waits * for a long time after contacting the JT. - Wait for the 2nd tracker to get stuck - Kill the job * - Wait for the job to retire - Check if the tip mappings are cleaned up. */ public void testJobRetireWithUnreportedTasks() throws Exception { MiniMRCluster mr = null; try { JobConf conf = new JobConf(); // set the num-map-slots to 1 so that no reduce tasks but setup/cleanup // can run on it conf.setInt("mapred.tasktracker.map.tasks.maximum", 1); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 0); mr = startCluster(conf, 1); JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker(); RunningJob job = UtilsForTests.runJob( mr.createJobConf(), new Path(testDir, "in-1"), new Path(testDir, "out-1"), 1, 1); JobID id = JobID.downgrade(job.getID()); JobInProgress jip = jobtracker.getJob(id); // wait 100 secs for the job to complete its map task for (int i = 0; i < 1000 && jip.finishedMaps() < 1; i++) { UtilsForTests.waitFor(100); } assertEquals(jip.finishedMaps(), 1); // start a tracker that will wait LOG.info("Adding a waiting tracker"); TaskTrackerRunner testTrackerRunner = mr.new TaskTrackerRunner(1, 1, null, mr.createJobConf()) { @Override TaskTracker createTaskTracker(JobConf conf) throws InterruptedException, IOException { return new WaitingTaskTracker(conf); } }; mr.addTaskTracker(testTrackerRunner); LOG.info("Waiting tracker added"); WaitingTaskTracker testTT = (WaitingTaskTracker) testTrackerRunner.getTaskTracker(); // wait 100 secs for the newly started task-tracker to join for (int i = 0; i < 1000 && jobtracker.taskTrackers().size() < 2; i++) { UtilsForTests.waitFor(100); } assertEquals(jobtracker.taskTrackers().size(), 2); LOG.info("Cluster is now up with 2 trackers"); // stop the test-tt as its no longer required mr.stopTaskTracker(mr.getTaskTrackerID(testTT.getName())); // 1 reduce task should be scheduled assertEquals("TestTT contacted but no reduce task scheduled on it", 1, jip.runningReduces()); // kill the job LOG.info("Killing job " + id); job.killJob(); // check if the reduce task attempt status is missing TaskInProgress tip = jip.getTasks(TaskType.REDUCE)[0]; assertNull(tip.getTaskStatus(tip.getAllTaskAttemptIDs()[0])); // wait for the job to retire waitTillRetire(id, jobtracker); // check the taskidToTIPMap for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) { LOG.info("TaskidToTIP : " + tid); } assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size()); } finally { if (mr != null) { mr.shutdown(); } // cleanup FileUtil.fullyDelete(new File(testDir.toString())); } }
public int getJobTrackerInfoPort() { return tracker.getInfoPort(); }
private void printFailedAttempts( JspWriter out, JobTracker tracker, JobID jobId, TaskInProgress tip, TaskStatus.State failState) throws IOException { TaskStatus[] statuses = tip.getTaskStatuses(); TaskID tipId = tip.getTIPId(); for (int i = 0; i < statuses.length; ++i) { TaskStatus.State taskState = statuses[i].getRunState(); if ((failState == null && (taskState == TaskStatus.State.FAILED || taskState == TaskStatus.State.KILLED)) || taskState == failState) { String taskTrackerName = statuses[i].getTaskTracker(); TaskTrackerStatus taskTracker = tracker.getTaskTrackerStatus(taskTrackerName); out.print( "<tr><td>" + statuses[i].getTaskID() + "</td><td><a href=\"taskdetails.jsp?jobid=" + jobId + "&tipid=" + tipId + "\">" + tipId + "</a></td>"); if (taskTracker == null) { out.print("<td>" + taskTrackerName + "</td>"); } else { out.print( "<td><a href=\"http://" + taskTracker.getHost() + ":" + taskTracker.getHttpPort() + "\">" + taskTracker.getHost() + "</a></td>"); } out.print("<td>" + taskState + "</td>"); out.print("<td><pre>"); String[] failures = tracker.getTaskDiagnostics(statuses[i].getTaskID()); if (failures == null) { out.print(" "); } else { for (int j = 0; j < failures.length; j++) { out.print(failures[j]); if (j < (failures.length - 1)) { out.print("\n-------\n"); } } } out.print("</pre></td>"); out.print("<td>"); String taskLogUrl = null; if (taskTracker != null) { taskLogUrl = TaskLogServlet.getTaskLogUrl( taskTracker.getHost(), String.valueOf(taskTracker.getHttpPort()), statuses[i].getTaskID().toString()); } if (taskLogUrl != null) { String tailFourKBUrl = taskLogUrl + "&start=-4097"; String tailEightKBUrl = taskLogUrl + "&start=-8193"; String entireLogUrl = taskLogUrl; out.print("<a href=\"" + tailFourKBUrl + "\">Last 4KB</a><br/>"); out.print("<a href=\"" + tailEightKBUrl + "\">Last 8KB</a><br/>"); out.print("<a href=\"" + entireLogUrl + "\">All</a><br/>"); } else { out.print("n/a"); // task tracker was lost } out.print("</td>"); out.print("</tr>\n"); } } }
// for initTasks, update information from JobStory object @Override public synchronized void initTasks() throws IOException { boolean loggingEnabled = LOG.isDebugEnabled(); if (loggingEnabled) { LOG.debug("(initTasks@SJIP) Starting Initialization for " + jobId); } numMapTasks = jobStory.getNumberMaps(); numReduceTasks = jobStory.getNumberReduces(); JobHistory.JobInfo.logSubmitted( getJobID(), conf, jobFile.toString(), this.startTime, hasRestarted()); if (loggingEnabled) { LOG.debug("(initTasks@SJIP) Logged to job history for " + jobId); } // checkTaskLimits(); if (loggingEnabled) { LOG.debug("(initTasks@SJIP) Checked task limits for " + jobId); } final String jobFile = "default"; splits = getRawSplits(jobStory.getInputSplits()); if (loggingEnabled) { LOG.debug( "(initTasks@SJIP) Created splits for job = " + jobId + " number of splits = " + splits.length); } // createMapTasks(jobFile, splits); numMapTasks = splits.length; maps = new TaskInProgress[numMapTasks]; for (int i = 0; i < numMapTasks; ++i) { inputLength += splits[i].getDataLength(); maps[i] = new TaskInProgress(jobId, jobFile, splits[i], conf, this, i, numSlotsPerMap); } if (numMapTasks > 0) { nonRunningMapCache = createCache(splits, maxLevel); if (loggingEnabled) { LOG.debug( "initTasks:numMaps=" + numMapTasks + " Size of nonRunningMapCache=" + nonRunningMapCache.size() + " for " + jobId); } } // set the launch time this.launchTime = JobTracker.getClock().getTime(); // createReduceTasks(jobFile); // // Create reduce tasks // this.reduces = new TaskInProgress[numReduceTasks]; for (int i = 0; i < numReduceTasks; i++) { reduces[i] = new TaskInProgress(jobId, jobFile, numMapTasks, i, conf, this, numSlotsPerReduce); nonRunningReduces.add(reduces[i]); } // Calculate the minimum number of maps to be complete before // we should start scheduling reduces completedMapsForReduceSlowstart = (int) Math.ceil( (conf.getFloat( "mapred.reduce.slowstart." + "completed.maps", DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART) * numMapTasks)); tasksInited.set(true); if (loggingEnabled) { LOG.debug( "Initializing job, nowstatus = " + JobStatus.getJobRunState(getStatus().getRunState())); } setupComplete(); if (loggingEnabled) { LOG.debug( "Initializing job, inited-status = " + JobStatus.getJobRunState(getStatus().getRunState())); } }
public void _jspService(HttpServletRequest request, HttpServletResponse response) throws java.io.IOException, ServletException { PageContext pageContext = null; HttpSession session = null; ServletContext application = null; ServletConfig config = null; JspWriter out = null; Object page = this; JspWriter _jspx_out = null; PageContext _jspx_page_context = null; try { response.setContentType("text/html; charset=UTF-8"); pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true); _jspx_page_context = pageContext; application = pageContext.getServletContext(); config = pageContext.getServletConfig(); session = pageContext.getSession(); out = pageContext.getOut(); _jspx_out = out; _jspx_resourceInjector = (org.apache.jasper.runtime.ResourceInjector) application.getAttribute("com.sun.appserv.jsp.resource.injector"); out.write('\n'); JobTracker tracker = (JobTracker) application.getAttribute("job.tracker"); ClusterStatus status = tracker.getClusterStatus(); String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine()); out.write("\n<html>\n<head>\n<title>"); out.print(trackerName); out.write( " Hadoop Locality Statistics</title>\n<link rel=\"stylesheet\" type=\"text/css\" href=\"/static/hadoop.css\">\n</head>\n<body>\n<h1>"); out.print(trackerName); out.write(" Hadoop Locality Statistics</h1>\n\n<b>State:</b> "); out.print(status.getJobTrackerState()); out.write("<br>\n<b>Started:</b> "); out.print(new Date(tracker.getStartTime())); out.write("<br>\n<b>Version:</b> "); out.print(VersionInfo.getVersion()); out.write(",\n r"); out.print(VersionInfo.getRevision()); out.write("<br>\n<b>Compiled:</b> "); out.print(VersionInfo.getDate()); out.write(" by\n "); out.print(VersionInfo.getUser()); out.write("<br>\n<b>Identifier:</b> "); out.print(tracker.getTrackerIdentifier()); out.write("<br>\n\n<hr>\n\n"); Collection<JobInProgress> jobs = new ArrayList<JobInProgress>(); jobs.addAll(tracker.completedJobs()); jobs.addAll(tracker.runningJobs()); jobs.addAll(tracker.failedJobs()); int dataLocalMaps = 0; int rackLocalMaps = 0; int totalMaps = 0; int totalReduces = 0; for (JobInProgress job : jobs) { Counters counters = job.getCounters(); dataLocalMaps += counters.getCounter(JobInProgress.Counter.DATA_LOCAL_MAPS); rackLocalMaps += counters.getCounter(JobInProgress.Counter.RACK_LOCAL_MAPS); totalMaps += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS); totalReduces += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES); } int dataLocalMapPct = totalMaps == 0 ? 0 : (100 * dataLocalMaps) / totalMaps; int rackLocalMapPct = totalMaps == 0 ? 0 : (100 * rackLocalMaps) / totalMaps; int dataRackLocalMapPct = totalMaps == 0 ? 0 : (100 * (dataLocalMaps + rackLocalMaps)) / totalMaps; out.write("\n<p>\n<b>Data Local Maps:</b> "); out.print(dataLocalMaps); out.write(' '); out.write('('); out.print(dataLocalMapPct); out.write("%) <br>\n<b>Rack Local Maps:</b> "); out.print(rackLocalMaps); out.write(' '); out.write('('); out.print(rackLocalMapPct); out.write("%) <br>\n<b>Data or Rack Local:</b> "); out.print(dataLocalMaps + rackLocalMaps); out.write(' '); out.write('('); out.print(dataRackLocalMapPct); out.write("%) <br>\n<b>Total Maps:</b> "); out.print(totalMaps); out.write(" <br>\n<b>Total Reduces:</b> "); out.print(totalReduces); out.write(" <br>\n</p>\n\n"); out.println(ServletUtil.htmlFooter()); out.write('\n'); } catch (Throwable t) { if (!(t instanceof SkipPageException)) { out = _jspx_out; if (out != null && out.getBufferSize() != 0) out.clearBuffer(); if (_jspx_page_context != null) _jspx_page_context.handlePageException(t); } } finally { _jspxFactory.releasePageContext(_jspx_page_context); } }
@SuppressWarnings("deprecation") public SimulatorJobInProgress( JobID jobid, JobTracker jobtracker, JobConf default_conf, JobStory jobStory) { super(jobid, jobStory.getJobConf(), jobtracker); // jobSetupCleanupNeeded set to false in parent cstr, though // default is true restartCount = 0; jobSetupCleanupNeeded = false; this.memoryPerMap = conf.getMemoryForMapTask(); this.memoryPerReduce = conf.getMemoryForReduceTask(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker(); this.jobId = jobid; String url = "http://" + jobtracker.getJobTrackerMachine() + ":" + jobtracker.getInfoPort() + "/jobdetails.jsp?jobid=" + jobid; this.jobtracker = jobtracker; this.conf = jobStory.getJobConf(); this.priority = conf.getJobPriority(); Path jobDir = jobtracker.getSystemDirectoryForJob(jobid); this.jobFile = new Path(jobDir, "job.xml"); this.status = new JobStatus(jobid, 0.0f, 0.0f, 0.0f, 0.0f, JobStatus.PREP, priority, conf.getUser()); this.profile = new JobProfile( jobStory.getUser(), jobid, this.jobFile.toString(), url, jobStory.getName(), conf.getQueueName()); this.startTime = JobTracker.getClock().getTime(); status.setStartTime(startTime); this.resourceEstimator = new ResourceEstimator(this); this.numMapTasks = jobStory.getNumberMaps(); this.numReduceTasks = jobStory.getNumberReduces(); this.taskCompletionEvents = new ArrayList<TaskCompletionEvent>(numMapTasks + numReduceTasks + 10); this.mapFailuresPercent = conf.getMaxMapTaskFailuresPercent(); this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent(); MetricsContext metricsContext = MetricsUtil.getContext("mapred"); this.jobMetrics = MetricsUtil.createRecord(metricsContext, "job"); this.jobMetrics.setTag("user", conf.getUser()); this.jobMetrics.setTag("sessionId", conf.getSessionId()); this.jobMetrics.setTag("jobName", conf.getJobName()); this.jobMetrics.setTag("jobId", jobid.toString()); this.maxLevel = jobtracker.getNumTaskCacheLevels(); this.anyCacheLevel = this.maxLevel + 1; this.nonLocalMaps = new LinkedList<TaskInProgress>(); this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>(); this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>(); this.nonRunningReduces = new LinkedList<TaskInProgress>(); this.runningReduces = new LinkedHashSet<TaskInProgress>(); this.slowTaskThreshold = Math.max(0.0f, conf.getFloat("mapred.speculative.execution.slowTaskThreshold", 1.0f)); this.speculativeCap = conf.getFloat("mapred.speculative.execution.speculativeCap", 0.1f); this.slowNodeThreshold = conf.getFloat("mapred.speculative.execution.slowNodeThreshold", 1.0f); this.jobStory = jobStory; // this.jobHistory = this.jobtracker.getJobHistory(); }
public int getJobTrackerPort() { return tracker.getTrackerPort(); }