@Test public void testTaskIdCountersDefault() throws JSONException, Exception { WebResource r = resource(); Map<JobId, Job> jobsMap = appContext.getAllJobs(); for (JobId id : jobsMap.keySet()) { String jobId = MRApps.toString(id); for (Task task : jobsMap.get(id).getTasks().values()) { String tid = MRApps.toString(task.getID()); ClientResponse response = r.path("ws") .path("v1") .path("history") .path("mapreduce") .path("jobs") .path(jobId) .path("tasks") .path(tid) .path("counters") .get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("jobTaskCounters"); verifyHsJobTaskCounters(info, task); } } }
public void verifyTaskGeneric( Task task, String id, String state, String type, String successfulAttempt, long startTime, long finishTime, long elapsedTime, float progress) { TaskId taskid = task.getID(); String tid = MRApps.toString(taskid); TaskReport report = task.getReport(); WebServicesTestUtils.checkStringMatch("id", tid, id); WebServicesTestUtils.checkStringMatch("type", task.getType().toString(), type); WebServicesTestUtils.checkStringMatch("state", report.getTaskState().toString(), state); // not easily checked without duplicating logic, just make sure its here assertNotNull("successfulAttempt null", successfulAttempt); assertEquals("startTime wrong", report.getStartTime(), startTime); assertEquals("finishTime wrong", report.getFinishTime(), finishTime); assertEquals("elapsedTime wrong", finishTime - startTime, elapsedTime); assertEquals("progress wrong", report.getProgress() * 100, progress, 1e-3f); }
@Test // All Task attempts are timed out, leading to Job failure public void testTimedOutTask() throws Exception { MRApp app = new TimeOutTaskMRApp(1, 0); Configuration conf = new Configuration(); int maxAttempts = 2; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals( "Task state not correct", TaskState.FAILED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size()); for (TaskAttempt attempt : attempts.values()) { Assert.assertEquals( "Attempt state not correct", TaskAttemptState.FAILED, attempt.getReport().getTaskAttemptState()); } }
@Test // First attempt is failed and second attempt is passed // The job succeeds. public void testFailTask() throws Exception { MRApp app = new MockFirstFailingAttemptMRApp(1, 0); Configuration conf = new Configuration(); // this test requires two task attempts, but uberization overrides max to 1 conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals( "Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", 2, attempts.size()); // one attempt must be failed // and another must have succeeded Iterator<TaskAttempt> it = attempts.values().iterator(); Assert.assertEquals( "Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState()); Assert.assertEquals( "Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState()); }
@Test public void testCommitPending() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); TaskAttempt attempt = task.getAttempts().values().iterator().next(); app.waitForState(attempt, TaskAttemptState.RUNNING); // send the commit pending signal to the task app.getContext() .getEventHandler() .handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_COMMIT_PENDING)); // wait for first attempt to commit pending app.waitForState(attempt, TaskAttemptState.COMMIT_PENDING); // send the done signal to the task app.getContext() .getEventHandler() .handle( new TaskAttemptEvent( task.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE)); app.waitForState(job, JobState.SUCCEEDED); }
@Test public void testJobTaskCountersXML() throws Exception { WebResource r = resource(); Map<JobId, Job> jobsMap = appContext.getAllJobs(); for (JobId id : jobsMap.keySet()) { String jobId = MRApps.toString(id); for (Task task : jobsMap.get(id).getTasks().values()) { String tid = MRApps.toString(task.getID()); ClientResponse response = r.path("ws") .path("v1") .path("history") .path("mapreduce") .path("jobs") .path(jobId) .path("tasks") .path(tid) .path("counters") .accept(MediaType.APPLICATION_XML) .get(ClientResponse.class); assertEquals(MediaType.APPLICATION_XML_TYPE, response.getType()); String xml = response.getEntity(String.class); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource is = new InputSource(); is.setCharacterStream(new StringReader(xml)); Document dom = db.parse(is); NodeList info = dom.getElementsByTagName("jobTaskCounters"); verifyHsTaskCountersXML(info, task); } } }
@Override protected void render(Block html) { if (app.getJob() == null) { html. h2($(TITLE)); return; } TaskType type = null; String symbol = $(TASK_TYPE); if (!symbol.isEmpty()) { type = MRApps.taskType(symbol); } TBODY<TABLE<Hamlet>> tbody = html. table("#tasks"). thead(). tr(). th("Task"). th("Progress"). th("State"). th("Start Time"). th("Finish Time"). th("Elapsed Time")._()._(). tbody(); StringBuilder tasksTableData = new StringBuilder("[\n"); for (Task task : app.getJob().getTasks().values()) { if (type != null && task.getType() != type) { continue; } TaskInfo info = new TaskInfo(task); String tid = info.getId(); String pct = percent(info.getProgress() / 100); tasksTableData.append("[\"<a href='").append(url("task", tid)) .append("'>").append(tid).append("</a>\",\"") //Progress bar .append("<br title='").append(pct) .append("'> <div class='").append(C_PROGRESSBAR).append("' title='") .append(join(pct, '%')).append("'> ").append("<div class='") .append(C_PROGRESSBAR_VALUE).append("' style='") .append(join("width:", pct, '%')).append("'> </div> </div>\",\"") .append(info.getState()).append("\",\"") .append(info.getStartTime()).append("\",\"") .append(info.getFinishTime()).append("\",\"") .append(info.getElapsedTime()).append("\"],\n"); } //Remove the last comma and close off the array of arrays if(tasksTableData.charAt(tasksTableData.length() - 2) == ',') { tasksTableData.delete(tasksTableData.length()-2, tasksTableData.length()-1); } tasksTableData.append("]"); html.script().$type("text/javascript"). _("var tasksTableData=" + tasksTableData)._(); tbody._()._(); }
@Override public void updateAttempt(TaskAttemptStatus status, long timestamp) { TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) { boolean isNew = false; // is this a new success? synchronized (doneTasks) { if (!doneTasks.contains(task)) { doneTasks.add(task); isNew = true; } } // It's a new completion // Note that if a task completes twice [because of a previous speculation // and a race, or a success followed by loss of the machine with the // local data] we only count the first one. if (isNew) { long finish = timestamp; if (start > 1L && finish > 1L && start <= finish) { long duration = finish - start; DataStatistics statistics = dataStatisticsForTask(taskID); if (statistics != null) { statistics.add(duration); } } } } }
public void verifyHsTaskCountersXML(NodeList nodes, Task task) { for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); WebServicesTestUtils.checkStringMatch( "id", MRApps.toString(task.getID()), WebServicesTestUtils.getXmlString(element, "id")); // just do simple verification of fields - not data is correct // in the fields NodeList groups = element.getElementsByTagName("taskCounterGroup"); for (int j = 0; j < groups.getLength(); j++) { Element counters = (Element) groups.item(j); assertNotNull("should have counters in the web service info", counters); String name = WebServicesTestUtils.getXmlString(counters, "counterGroupName"); assertTrue("name not set", (name != null && !name.isEmpty())); NodeList counterArr = counters.getElementsByTagName("counter"); for (int z = 0; z < counterArr.getLength(); z++) { Element counter = (Element) counterArr.item(z); String counterName = WebServicesTestUtils.getXmlString(counter, "name"); assertTrue("counter name not set", (counterName != null && !counterName.isEmpty())); long value = WebServicesTestUtils.getXmlLong(counter, "value"); assertTrue("value not >= 0", value >= 0); } } } }
@Test public void testJobError() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); // send an invalid event on task at current state app.getContext() .getEventHandler() .handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE)); // this must lead to job error app.waitForState(job, JobState.ERROR); }
public void verifyHsTask(JSONArray arr, Job job, String type) throws JSONException { for (Task task : job.getTasks().values()) { TaskId id = task.getID(); String tid = MRApps.toString(id); Boolean found = false; if (type != null && task.getType() == MRApps.taskType(type)) { for (int i = 0; i < arr.length(); i++) { JSONObject info = arr.getJSONObject(i); if (tid.matches(info.getString("id"))) { found = true; verifyHsSingleTask(info, task); } } assertTrue("task with id: " + tid + " not in web service output", found); } } }
protected DataStatistics dataStatisticsForTask(TaskId taskID) { JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return null; } Task task = job.getTask(taskID); if (task == null) { return null; } return task.getType() == TaskType.MAP ? mapperStatistics.get(job) : task.getType() == TaskType.REDUCE ? reducerStatistics.get(job) : null; }
public void verifyHsTaskXML(NodeList nodes, Job job) { assertEquals("incorrect number of elements", 2, nodes.getLength()); for (Task task : job.getTasks().values()) { TaskId id = task.getID(); String tid = MRApps.toString(id); Boolean found = false; for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); if (tid.matches(WebServicesTestUtils.getXmlString(element, "id"))) { found = true; verifyHsSingleTaskXML(element, task); } } assertTrue("task with id: " + tid + " not in web service output", found); } }
private long storedPerAttemptValue(Map<TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) { TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); Task task = job.getTask(taskID); if (task == null) { return -1L; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return -1L; } AtomicLong estimate = data.get(taskAttempt); return estimate == null ? -1L : estimate.get(); }
public void verifyHsJobTaskCounters(JSONObject info, Task task) throws JSONException { assertEquals("incorrect number of elements", 2, info.length()); WebServicesTestUtils.checkStringMatch( "id", MRApps.toString(task.getID()), info.getString("id")); // just do simple verification of fields - not data is correct // in the fields JSONArray counterGroups = info.getJSONArray("taskCounterGroup"); for (int i = 0; i < counterGroups.length(); i++) { JSONObject counterGroup = counterGroups.getJSONObject(i); String name = counterGroup.getString("counterGroupName"); assertTrue("name not set", (name != null && !name.isEmpty())); JSONArray counters = counterGroup.getJSONArray("counter"); for (int j = 0; j < counters.length(); j++) { JSONObject counter = counters.getJSONObject(j); String counterName = counter.getString("name"); assertTrue("name not set", (counterName != null && !counterName.isEmpty())); long value = counter.getLong("value"); assertTrue("value >= 0", value >= 0); } } }
// @Test public void testCompletedMapsForReduceSlowstart() throws Exception { MRApp app = new MRApp(2, 1, false, this.getClass().getName(), true); Configuration conf = new Configuration(); // after half of the map completion, reduce will start conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f); // uberization forces full slowstart (1.0), so disable that conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); // all maps would be running Assert.assertEquals("Num tasks not correct", 3, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task mapTask1 = it.next(); Task mapTask2 = it.next(); Task reduceTask = it.next(); // all maps must be running app.waitForState(mapTask1, TaskState.RUNNING); app.waitForState(mapTask2, TaskState.RUNNING); TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next(); TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next(); // before sending the TA_DONE, event make sure attempt has come to // RUNNING state app.waitForState(task1Attempt, TaskAttemptState.RUNNING); app.waitForState(task2Attempt, TaskAttemptState.RUNNING); // reduces must be in NEW state Assert.assertEquals( "Reduce Task state not correct", TaskState.NEW, reduceTask.getReport().getTaskState()); // send the done signal to the 1st map task app.getContext() .getEventHandler() .handle( new TaskAttemptEvent( mapTask1.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE)); // wait for first map task to complete app.waitForState(mapTask1, TaskState.SUCCEEDED); // Once the first map completes, it will schedule the reduces // now reduce must be running app.waitForState(reduceTask, TaskState.RUNNING); // send the done signal to 2nd map and the reduce to complete the job app.getContext() .getEventHandler() .handle( new TaskAttemptEvent( mapTask2.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE)); app.getContext() .getEventHandler() .handle( new TaskAttemptEvent( reduceTask.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE)); app.waitForState(job, JobState.SUCCEEDED); }
@Test public void testHistoryParsingForFailedAttempts() throws Exception { LOG.info("STARTING testHistoryParsingForFailedAttempts"); try { Configuration conf = new Configuration(); conf.setClass( CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); MRApp app = new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); app.waitForState(job, JobState.SUCCEEDED); // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId).getJobIndexInfo(); String jobhistoryFileName = FileNameIndexUtils.getDoneFileName(jobIndexInfo); Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); FSDataInputStream in = null; FileContext fc = null; try { fc = FileContext.getFileContext(conf); in = fc.open(fc.makeQualified(historyFilePath)); } catch (IOException ioe) { LOG.info("Can not open history file: " + historyFilePath, ioe); throw (new Exception("Can not open History File")); } JobHistoryParser parser = new JobHistoryParser(in); JobInfo jobInfo = parser.parse(); Exception parseException = parser.getParseException(); Assert.assertNull("Caught an expected exception " + parseException, parseException); int noOffailedAttempts = 0; Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks(); for (Task task : job.getTasks().values()) { TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID()))); // Verify rack-name for all task attempts Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME); if (taskAttemptInfo.getTaskStatus().equals("FAILED")) { noOffailedAttempts++; } } } Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts); } finally { LOG.info("FINISHED testHistoryParsingForFailedAttempts"); } }
private void checkHistoryParsing( final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); long amStartTimeEst = System.currentTimeMillis(); conf.setClass( CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); app.waitForState(job, JobState.SUCCEEDED); // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); FileContext fc = null; try { fc = FileContext.getFileContext(conf); } catch (IOException ioe) { LOG.info("Can not get FileContext", ioe); throw (new Exception("Can not get File Context")); } if (numMaps == numSuccessfulMaps) { String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId); Path summaryFile = new Path(jobhistoryDir, summaryFileName); String jobSummaryString = getJobSummary(fc, summaryFile); Assert.assertNotNull(jobSummaryString); Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100")); Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100")); Map<String, String> jobSummaryElements = new HashMap<String, String>(); StringTokenizer strToken = new StringTokenizer(jobSummaryString, ","); while (strToken.hasMoreTokens()) { String keypair = strToken.nextToken(); jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]); } Assert.assertEquals( "JobId does not match", jobId.toString(), jobSummaryElements.get("jobId")); Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName")); Assert.assertTrue( "submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0); Assert.assertTrue( "launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0); Assert.assertTrue( "firstMapTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0); Assert.assertTrue( "firstReduceTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0); Assert.assertTrue( "finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0); Assert.assertEquals( "Mismatch in num map slots", numSuccessfulMaps, Integer.parseInt(jobSummaryElements.get("numMaps"))); Assert.assertEquals( "Mismatch in num reduce slots", numReduces, Integer.parseInt(jobSummaryElements.get("numReduces"))); Assert.assertEquals( "User does not match", System.getProperty("user.name"), jobSummaryElements.get("user")); Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue")); Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status")); } JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); JobInfo jobInfo; long numFinishedMaps; synchronized (fileInfo) { Path historyFilePath = fileInfo.getHistoryFile(); FSDataInputStream in = null; LOG.info("JobHistoryFile is: " + historyFilePath); try { in = fc.open(fc.makeQualified(historyFilePath)); } catch (IOException ioe) { LOG.info("Can not open history file: " + historyFilePath, ioe); throw (new Exception("Can not open History File")); } JobHistoryParser parser = new JobHistoryParser(in); final EventReader realReader = new EventReader(in); EventReader reader = Mockito.mock(EventReader.class); if (numMaps == numSuccessfulMaps) { reader = realReader; } else { final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack! Mockito.when(reader.getNextEvent()) .thenAnswer( new Answer<HistoryEvent>() { public HistoryEvent answer(InvocationOnMock invocation) throws IOException { HistoryEvent event = realReader.getNextEvent(); if (event instanceof TaskFinishedEvent) { numFinishedEvents.incrementAndGet(); } if (numFinishedEvents.get() <= numSuccessfulMaps) { return event; } else { throw new IOException("test"); } } }); } jobInfo = parser.parse(reader); numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps); if (numFinishedMaps != numMaps) { Exception parseException = parser.getParseException(); Assert.assertNotNull("Didn't get expected parse exception", parseException); } } Assert.assertEquals( "Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername()); Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname()); Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName()); Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath()); Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps); Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces()); Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized()); Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks(); int totalTasks = allTasks.size(); Assert.assertEquals("total number of tasks is incorrect ", (numMaps + numReduces), totalTasks); // Verify aminfo Assert.assertEquals(1, jobInfo.getAMInfos().size()); Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost()); AMInfo amInfo = jobInfo.getAMInfos().get(0); Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort()); Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort()); Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId()); Assert.assertEquals( amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId()); Assert.assertTrue( amInfo.getStartTime() <= System.currentTimeMillis() && amInfo.getStartTime() >= amStartTimeEst); ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1); // Assert at taskAttempt level for (TaskInfo taskInfo : allTasks.values()) { int taskAttemptCount = taskInfo.getAllTaskAttempts().size(); Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount); TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next(); Assert.assertNotNull(taInfo.getContainerId()); // Verify the wrong ctor is not being used. Remove after mrv1 is removed. Assert.assertFalse(taInfo.getContainerId().equals(fakeCid)); } // Deep compare Job and JobInfo for (Task task : job.getTasks().values()) { TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); Assert.assertNotNull("TaskInfo not found", taskInfo); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID()))); Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo); Assert.assertEquals( "Incorrect shuffle port for task attempt", taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort()); if (numMaps == numSuccessfulMaps) { Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname()); Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort()); // Verify rack-name Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME); } } } }
/** * Go through a job and update the member variables with counts for information to output in the * page. * * @param job the job to get counts for. */ private void countTasksAndAttempts(Job job) { final Map<TaskId, Task> tasks = job.getTasks(); if (tasks == null) { return; } for (Task task : tasks.values()) { switch (task.getType()) { case MAP: // Task counts switch (task.getState()) { case RUNNING: ++this.mapsRunning; break; case SCHEDULED: ++this.mapsPending; break; default: break; } break; case REDUCE: // Task counts switch (task.getState()) { case RUNNING: ++this.reducesRunning; break; case SCHEDULED: ++this.reducesPending; break; default: break; } break; default: throw new IllegalStateException("Task type is neither map nor reduce: " + task.getType()); } // Attempts counts Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts(); int newAttempts, running, successful, failed, killed; for (TaskAttempt attempt : attempts.values()) { newAttempts = 0; running = 0; successful = 0; failed = 0; killed = 0; if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) { ++newAttempts; } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) { ++running; } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt.getState())) { ++successful; } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) { ++failed; } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) { ++killed; } switch (task.getType()) { case MAP: this.newMapAttempts += newAttempts; this.runningMapAttempts += running; this.successfulMapAttempts += successful; this.failedMapAttempts += failed; this.killedMapAttempts += killed; break; case REDUCE: this.newReduceAttempts += newAttempts; this.runningReduceAttempts += running; this.successfulReduceAttempts += successful; this.failedReduceAttempts += failed; this.killedReduceAttempts += killed; break; default: throw new IllegalStateException("Task type neither map nor reduce: " + task.getType()); } } } }
@Override public void updateAttempt(TaskAttemptStatus status, long timestamp) { super.updateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return; } Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; // We need to do two things. // 1: If this is a completion, we accumulate statistics in the superclass // 2: If this is not a completion, we learn more about it. // This is not a completion, but we're cooking. // if (taskAttempt.getState() == TaskAttemptState.RUNNING) { // See if this task is already in the registry AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt); AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); if (estimateContainer == null) { if (attemptRuntimeEstimates.get(taskAttempt) == null) { attemptRuntimeEstimates.put(taskAttempt, new AtomicLong()); estimateContainer = attemptRuntimeEstimates.get(taskAttempt); } } if (estimateVarianceContainer == null) { attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong()); estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); } long estimate = -1; long varianceEstimate = -1; // This code assumes that we'll never consider starting a third // speculative task attempt if two are already running for this task if (start > 0 && timestamp > start) { estimate = (long) ((timestamp - start) / Math.max(0.0001, status.progress)); varianceEstimate = (long) (estimate * status.progress / 10); } if (estimateContainer != null) { estimateContainer.set(estimate); } if (estimateVarianceContainer != null) { estimateVarianceContainer.set(varianceEstimate); } } }
/* * (non-Javadoc) * @see org.apache.hadoop.yarn.webapp.view.HtmlBlock#render(org.apache.hadoop.yarn.webapp.view.HtmlBlock.Block) */ @Override protected void render(Block html) { if (app.getJob() == null) { html. h2($(TITLE)); return; } TaskType type = null; String symbol = $(TASK_TYPE); if (!symbol.isEmpty()) { type = MRApps.taskType(symbol); } THEAD<TABLE<Hamlet>> thead; if(type != null) thead = html.table("#"+app.getJob().getID() + type).$class("dt-tasks").thead(); else thead = html.table("#tasks").thead(); //Create the spanning row int attemptColSpan = type == TaskType.REDUCE ? 8 : 3; thead.tr(). th().$colspan(5).$class("ui-state-default")._("Task")._(). th().$colspan(attemptColSpan).$class("ui-state-default"). _("Successful Attempt")._(). _(); TR<THEAD<TABLE<Hamlet>>> theadRow = thead. tr(). th("Name"). th("State"). th("Start Time"). th("Finish Time"). th("Elapsed Time"). th("Start Time"); //Attempt if(type == TaskType.REDUCE) { theadRow.th("Shuffle Finish Time"); //Attempt theadRow.th("Merge Finish Time"); //Attempt } theadRow.th("Finish Time"); //Attempt if(type == TaskType.REDUCE) { theadRow.th("Elapsed Time Shuffle"); //Attempt theadRow.th("Elapsed Time Merge"); //Attempt theadRow.th("Elapsed Time Reduce"); //Attempt } theadRow.th("Elapsed Time"); //Attempt TBODY<TABLE<Hamlet>> tbody = theadRow._()._().tbody(); // Write all the data into a JavaScript array of arrays for JQuery // DataTables to display StringBuilder tasksTableData = new StringBuilder("[\n"); for (Task task : app.getJob().getTasks().values()) { if (type != null && task.getType() != type) { continue; } TaskInfo info = new TaskInfo(task); String tid = info.getId(); long startTime = info.getStartTime(); long finishTime = info.getFinishTime(); long elapsed = info.getElapsedTime(); long attemptStartTime = -1; long shuffleFinishTime = -1; long sortFinishTime = -1; long attemptFinishTime = -1; long elapsedShuffleTime = -1; long elapsedSortTime = -1;; long elapsedReduceTime = -1; long attemptElapsed = -1; TaskAttempt successful = info.getSuccessful(); if(successful != null) { TaskAttemptInfo ta; if(type == TaskType.REDUCE) { ReduceTaskAttemptInfo rta = new ReduceTaskAttemptInfo(successful, type); shuffleFinishTime = rta.getShuffleFinishTime(); sortFinishTime = rta.getMergeFinishTime(); elapsedShuffleTime = rta.getElapsedShuffleTime(); elapsedSortTime = rta.getElapsedMergeTime(); elapsedReduceTime = rta.getElapsedReduceTime(); ta = rta; } else { ta = new TaskAttemptInfo(successful, type, false); } attemptStartTime = ta.getStartTime(); attemptFinishTime = ta.getFinishTime(); attemptElapsed = ta.getElapsedTime(); } tasksTableData.append("[\"") .append("<a href='" + url("task", tid)).append("'>") .append(tid).append("</a>\",\"") .append(info.getState()).append("\",\"") .append(startTime).append("\",\"") .append(finishTime).append("\",\"") .append(elapsed).append("\",\"") .append(attemptStartTime).append("\",\""); if(type == TaskType.REDUCE) { tasksTableData.append(shuffleFinishTime).append("\",\"") .append(sortFinishTime).append("\",\""); } tasksTableData.append(attemptFinishTime).append("\",\""); if(type == TaskType.REDUCE) { tasksTableData.append(elapsedShuffleTime).append("\",\"") .append(elapsedSortTime).append("\",\"") .append(elapsedReduceTime).append("\",\""); } tasksTableData.append(attemptElapsed).append("\"],\n"); } //Remove the last comma and close off the array of arrays if(tasksTableData.charAt(tasksTableData.length() - 2) == ',') { tasksTableData.delete( tasksTableData.length()-2, tasksTableData.length()-1); } tasksTableData.append("]"); html.script().$type("text/javascript"). _("var tasksTableData=" + tasksTableData)._(); TR<TFOOT<TABLE<Hamlet>>> footRow = tbody._().tfoot().tr(); footRow.th().input("search_init").$type(InputType.text).$name("task") .$value("ID")._()._().th().input("search_init").$type(InputType.text) .$name("state").$value("State")._()._().th().input("search_init") .$type(InputType.text).$name("start_time").$value("Start Time")._()._() .th().input("search_init").$type(InputType.text).$name("finish_time") .$value("Finish Time")._()._().th().input("search_init") .$type(InputType.text).$name("elapsed_time").$value("Elapsed Time")._() ._().th().input("search_init").$type(InputType.text) .$name("attempt_start_time").$value("Start Time")._()._(); if(type == TaskType.REDUCE) { footRow.th().input("search_init").$type(InputType.text) .$name("shuffle_time").$value("Shuffle Time")._()._(); footRow.th().input("search_init").$type(InputType.text) .$name("merge_time").$value("Merge Time")._()._(); } footRow.th().input("search_init").$type(InputType.text) .$name("attempt_finish").$value("Finish Time")._()._(); if(type == TaskType.REDUCE) { footRow.th().input("search_init").$type(InputType.text) .$name("elapsed_shuffle_time").$value("Elapsed Shuffle Time")._()._(); footRow.th().input("search_init").$type(InputType.text) .$name("elapsed_merge_time").$value("Elapsed Merge Time")._()._(); footRow.th().input("search_init").$type(InputType.text) .$name("elapsed_reduce_time").$value("Elapsed Reduce Time")._()._(); } footRow.th().input("search_init").$type(InputType.text) .$name("attempt_elapsed").$value("Elapsed Time")._()._(); footRow._()._()._(); }