예제 #1
0
  @Test
  public void testTaskIdCountersDefault() throws JSONException, Exception {
    WebResource r = resource();
    Map<JobId, Job> jobsMap = appContext.getAllJobs();
    for (JobId id : jobsMap.keySet()) {
      String jobId = MRApps.toString(id);
      for (Task task : jobsMap.get(id).getTasks().values()) {

        String tid = MRApps.toString(task.getID());
        ClientResponse response =
            r.path("ws")
                .path("v1")
                .path("history")
                .path("mapreduce")
                .path("jobs")
                .path(jobId)
                .path("tasks")
                .path(tid)
                .path("counters")
                .get(ClientResponse.class);
        assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
        JSONObject json = response.getEntity(JSONObject.class);
        assertEquals("incorrect number of elements", 1, json.length());
        JSONObject info = json.getJSONObject("jobTaskCounters");
        verifyHsJobTaskCounters(info, task);
      }
    }
  }
예제 #2
0
  public void verifyTaskGeneric(
      Task task,
      String id,
      String state,
      String type,
      String successfulAttempt,
      long startTime,
      long finishTime,
      long elapsedTime,
      float progress) {

    TaskId taskid = task.getID();
    String tid = MRApps.toString(taskid);
    TaskReport report = task.getReport();

    WebServicesTestUtils.checkStringMatch("id", tid, id);
    WebServicesTestUtils.checkStringMatch("type", task.getType().toString(), type);
    WebServicesTestUtils.checkStringMatch("state", report.getTaskState().toString(), state);
    // not easily checked without duplicating logic, just make sure its here
    assertNotNull("successfulAttempt null", successfulAttempt);
    assertEquals("startTime wrong", report.getStartTime(), startTime);
    assertEquals("finishTime wrong", report.getFinishTime(), finishTime);
    assertEquals("elapsedTime wrong", finishTime - startTime, elapsedTime);
    assertEquals("progress wrong", report.getProgress() * 100, progress, 1e-3f);
  }
 @Test
 // All Task attempts are timed out, leading to Job failure
 public void testTimedOutTask() throws Exception {
   MRApp app = new TimeOutTaskMRApp(1, 0);
   Configuration conf = new Configuration();
   int maxAttempts = 2;
   conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
   // disable uberization (requires entire job to be reattempted, so max for
   // subtask attempts is overridden to 1)
   conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
   Job job = app.submit(conf);
   app.waitForState(job, JobState.FAILED);
   Map<TaskId, Task> tasks = job.getTasks();
   Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
   Task task = tasks.values().iterator().next();
   Assert.assertEquals(
       "Task state not correct", TaskState.FAILED, task.getReport().getTaskState());
   Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
   Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size());
   for (TaskAttempt attempt : attempts.values()) {
     Assert.assertEquals(
         "Attempt state not correct",
         TaskAttemptState.FAILED,
         attempt.getReport().getTaskAttemptState());
   }
 }
 @Test
 // First attempt is failed and second attempt is passed
 // The job succeeds.
 public void testFailTask() throws Exception {
   MRApp app = new MockFirstFailingAttemptMRApp(1, 0);
   Configuration conf = new Configuration();
   // this test requires two task attempts, but uberization overrides max to 1
   conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
   Job job = app.submit(conf);
   app.waitForState(job, JobState.SUCCEEDED);
   Map<TaskId, Task> tasks = job.getTasks();
   Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
   Task task = tasks.values().iterator().next();
   Assert.assertEquals(
       "Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState());
   Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
   Assert.assertEquals("Num attempts is not correct", 2, attempts.size());
   // one attempt must be failed
   // and another must have succeeded
   Iterator<TaskAttempt> it = attempts.values().iterator();
   Assert.assertEquals(
       "Attempt state not correct",
       TaskAttemptState.FAILED,
       it.next().getReport().getTaskAttemptState());
   Assert.assertEquals(
       "Attempt state not correct",
       TaskAttemptState.SUCCEEDED,
       it.next().getReport().getTaskAttemptState());
 }
예제 #5
0
  @Test
  public void testCommitPending() throws Exception {
    MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true);
    Job job = app.submit(new Configuration());
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task task = it.next();
    app.waitForState(task, TaskState.RUNNING);
    TaskAttempt attempt = task.getAttempts().values().iterator().next();
    app.waitForState(attempt, TaskAttemptState.RUNNING);

    // send the commit pending signal to the task
    app.getContext()
        .getEventHandler()
        .handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_COMMIT_PENDING));

    // wait for first attempt to commit pending
    app.waitForState(attempt, TaskAttemptState.COMMIT_PENDING);

    // send the done signal to the task
    app.getContext()
        .getEventHandler()
        .handle(
            new TaskAttemptEvent(
                task.getAttempts().values().iterator().next().getID(),
                TaskAttemptEventType.TA_DONE));

    app.waitForState(job, JobState.SUCCEEDED);
  }
예제 #6
0
  @Test
  public void testJobTaskCountersXML() throws Exception {
    WebResource r = resource();
    Map<JobId, Job> jobsMap = appContext.getAllJobs();
    for (JobId id : jobsMap.keySet()) {
      String jobId = MRApps.toString(id);
      for (Task task : jobsMap.get(id).getTasks().values()) {

        String tid = MRApps.toString(task.getID());
        ClientResponse response =
            r.path("ws")
                .path("v1")
                .path("history")
                .path("mapreduce")
                .path("jobs")
                .path(jobId)
                .path("tasks")
                .path(tid)
                .path("counters")
                .accept(MediaType.APPLICATION_XML)
                .get(ClientResponse.class);
        assertEquals(MediaType.APPLICATION_XML_TYPE, response.getType());
        String xml = response.getEntity(String.class);
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = dbf.newDocumentBuilder();
        InputSource is = new InputSource();
        is.setCharacterStream(new StringReader(xml));
        Document dom = db.parse(is);
        NodeList info = dom.getElementsByTagName("jobTaskCounters");
        verifyHsTaskCountersXML(info, task);
      }
    }
  }
예제 #7
0
  @Override protected void render(Block html) {
    if (app.getJob() == null) {
      html.
        h2($(TITLE));
      return;
    }
    TaskType type = null;
    String symbol = $(TASK_TYPE);
    if (!symbol.isEmpty()) {
      type = MRApps.taskType(symbol);
    }
    TBODY<TABLE<Hamlet>> tbody = html.
      table("#tasks").
        thead().
          tr().
            th("Task").
            th("Progress").
            th("State").
            th("Start Time").
            th("Finish Time").
            th("Elapsed Time")._()._().
        tbody();
    StringBuilder tasksTableData = new StringBuilder("[\n");

    for (Task task : app.getJob().getTasks().values()) {
      if (type != null && task.getType() != type) {
        continue;
      }
      TaskInfo info = new TaskInfo(task);
      String tid = info.getId();
      String pct = percent(info.getProgress() / 100);
      tasksTableData.append("[\"<a href='").append(url("task", tid))
      .append("'>").append(tid).append("</a>\",\"")
      //Progress bar
      .append("<br title='").append(pct)
      .append("'> <div class='").append(C_PROGRESSBAR).append("' title='")
      .append(join(pct, '%')).append("'> ").append("<div class='")
      .append(C_PROGRESSBAR_VALUE).append("' style='")
      .append(join("width:", pct, '%')).append("'> </div> </div>\",\"")

      .append(info.getState()).append("\",\"")
      .append(info.getStartTime()).append("\",\"")
      .append(info.getFinishTime()).append("\",\"")
      .append(info.getElapsedTime()).append("\"],\n");
    }
    //Remove the last comma and close off the array of arrays
    if(tasksTableData.charAt(tasksTableData.length() - 2) == ',') {
      tasksTableData.delete(tasksTableData.length()-2, tasksTableData.length()-1);
    }
    tasksTableData.append("]");
    html.script().$type("text/javascript").
    _("var tasksTableData=" + tasksTableData)._();

    tbody._()._();
  }
예제 #8
0
  @Override
  public void updateAttempt(TaskAttemptStatus status, long timestamp) {

    TaskAttemptId attemptID = status.id;
    TaskId taskID = attemptID.getTaskId();
    JobId jobID = taskID.getJobId();
    Job job = context.getJob(jobID);

    if (job == null) {
      return;
    }

    Task task = job.getTask(taskID);

    if (task == null) {
      return;
    }

    Long boxedStart = startTimes.get(attemptID);
    long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;

    TaskAttempt taskAttempt = task.getAttempt(attemptID);

    if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) {
      boolean isNew = false;
      // is this  a new success?
      synchronized (doneTasks) {
        if (!doneTasks.contains(task)) {
          doneTasks.add(task);
          isNew = true;
        }
      }

      // It's a new completion
      // Note that if a task completes twice [because of a previous speculation
      //  and a race, or a success followed by loss of the machine with the
      //  local data] we only count the first one.
      if (isNew) {
        long finish = timestamp;
        if (start > 1L && finish > 1L && start <= finish) {
          long duration = finish - start;

          DataStatistics statistics = dataStatisticsForTask(taskID);

          if (statistics != null) {
            statistics.add(duration);
          }
        }
      }
    }
  }
예제 #9
0
  public void verifyHsTaskCountersXML(NodeList nodes, Task task) {

    for (int i = 0; i < nodes.getLength(); i++) {

      Element element = (Element) nodes.item(i);
      WebServicesTestUtils.checkStringMatch(
          "id", MRApps.toString(task.getID()), WebServicesTestUtils.getXmlString(element, "id"));
      // just do simple verification of fields - not data is correct
      // in the fields
      NodeList groups = element.getElementsByTagName("taskCounterGroup");

      for (int j = 0; j < groups.getLength(); j++) {
        Element counters = (Element) groups.item(j);
        assertNotNull("should have counters in the web service info", counters);
        String name = WebServicesTestUtils.getXmlString(counters, "counterGroupName");
        assertTrue("name not set", (name != null && !name.isEmpty()));
        NodeList counterArr = counters.getElementsByTagName("counter");
        for (int z = 0; z < counterArr.getLength(); z++) {
          Element counter = (Element) counterArr.item(z);
          String counterName = WebServicesTestUtils.getXmlString(counter, "name");
          assertTrue("counter name not set", (counterName != null && !counterName.isEmpty()));

          long value = WebServicesTestUtils.getXmlLong(counter, "value");
          assertTrue("value not >= 0", value >= 0);
        }
      }
    }
  }
예제 #10
0
  @Test
  public void testJobError() throws Exception {
    MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true);
    Job job = app.submit(new Configuration());
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task task = it.next();
    app.waitForState(task, TaskState.RUNNING);

    // send an invalid event on task at current state
    app.getContext()
        .getEventHandler()
        .handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE));

    // this must lead to job error
    app.waitForState(job, JobState.ERROR);
  }
예제 #11
0
  public void verifyHsTask(JSONArray arr, Job job, String type) throws JSONException {
    for (Task task : job.getTasks().values()) {
      TaskId id = task.getID();
      String tid = MRApps.toString(id);
      Boolean found = false;
      if (type != null && task.getType() == MRApps.taskType(type)) {

        for (int i = 0; i < arr.length(); i++) {
          JSONObject info = arr.getJSONObject(i);
          if (tid.matches(info.getString("id"))) {
            found = true;
            verifyHsSingleTask(info, task);
          }
        }
        assertTrue("task with id: " + tid + " not in web service output", found);
      }
    }
  }
예제 #12
0
  protected DataStatistics dataStatisticsForTask(TaskId taskID) {
    JobId jobID = taskID.getJobId();
    Job job = context.getJob(jobID);

    if (job == null) {
      return null;
    }

    Task task = job.getTask(taskID);

    if (task == null) {
      return null;
    }

    return task.getType() == TaskType.MAP
        ? mapperStatistics.get(job)
        : task.getType() == TaskType.REDUCE ? reducerStatistics.get(job) : null;
  }
예제 #13
0
  public void verifyHsTaskXML(NodeList nodes, Job job) {

    assertEquals("incorrect number of elements", 2, nodes.getLength());

    for (Task task : job.getTasks().values()) {
      TaskId id = task.getID();
      String tid = MRApps.toString(id);
      Boolean found = false;
      for (int i = 0; i < nodes.getLength(); i++) {
        Element element = (Element) nodes.item(i);

        if (tid.matches(WebServicesTestUtils.getXmlString(element, "id"))) {
          found = true;
          verifyHsSingleTaskXML(element, task);
        }
      }
      assertTrue("task with id: " + tid + " not in web service output", found);
    }
  }
  private long storedPerAttemptValue(Map<TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) {
    TaskId taskID = attemptID.getTaskId();
    JobId jobID = taskID.getJobId();
    Job job = context.getJob(jobID);

    Task task = job.getTask(taskID);

    if (task == null) {
      return -1L;
    }

    TaskAttempt taskAttempt = task.getAttempt(attemptID);

    if (taskAttempt == null) {
      return -1L;
    }

    AtomicLong estimate = data.get(taskAttempt);

    return estimate == null ? -1L : estimate.get();
  }
예제 #15
0
  public void verifyHsJobTaskCounters(JSONObject info, Task task) throws JSONException {

    assertEquals("incorrect number of elements", 2, info.length());

    WebServicesTestUtils.checkStringMatch(
        "id", MRApps.toString(task.getID()), info.getString("id"));
    // just do simple verification of fields - not data is correct
    // in the fields
    JSONArray counterGroups = info.getJSONArray("taskCounterGroup");
    for (int i = 0; i < counterGroups.length(); i++) {
      JSONObject counterGroup = counterGroups.getJSONObject(i);
      String name = counterGroup.getString("counterGroupName");
      assertTrue("name not set", (name != null && !name.isEmpty()));
      JSONArray counters = counterGroup.getJSONArray("counter");
      for (int j = 0; j < counters.length(); j++) {
        JSONObject counter = counters.getJSONObject(j);
        String counterName = counter.getString("name");
        assertTrue("name not set", (counterName != null && !counterName.isEmpty()));
        long value = counter.getLong("value");
        assertTrue("value  >= 0", value >= 0);
      }
    }
  }
예제 #16
0
  // @Test
  public void testCompletedMapsForReduceSlowstart() throws Exception {
    MRApp app = new MRApp(2, 1, false, this.getClass().getName(), true);
    Configuration conf = new Configuration();
    // after half of the map completion, reduce will start
    conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
    // uberization forces full slowstart (1.0), so disable that
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    Job job = app.submit(conf);
    app.waitForState(job, JobState.RUNNING);
    // all maps would be running
    Assert.assertEquals("Num tasks not correct", 3, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask1 = it.next();
    Task mapTask2 = it.next();
    Task reduceTask = it.next();

    // all maps must be running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);

    TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
    TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();

    // before sending the TA_DONE, event make sure attempt has come to
    // RUNNING state
    app.waitForState(task1Attempt, TaskAttemptState.RUNNING);
    app.waitForState(task2Attempt, TaskAttemptState.RUNNING);

    // reduces must be in NEW state
    Assert.assertEquals(
        "Reduce Task state not correct", TaskState.NEW, reduceTask.getReport().getTaskState());

    // send the done signal to the 1st map task
    app.getContext()
        .getEventHandler()
        .handle(
            new TaskAttemptEvent(
                mapTask1.getAttempts().values().iterator().next().getID(),
                TaskAttemptEventType.TA_DONE));

    // wait for first map task to complete
    app.waitForState(mapTask1, TaskState.SUCCEEDED);

    // Once the first map completes, it will schedule the reduces
    // now reduce must be running
    app.waitForState(reduceTask, TaskState.RUNNING);

    // send the done signal to 2nd map and the reduce to complete the job
    app.getContext()
        .getEventHandler()
        .handle(
            new TaskAttemptEvent(
                mapTask2.getAttempts().values().iterator().next().getID(),
                TaskAttemptEventType.TA_DONE));
    app.getContext()
        .getEventHandler()
        .handle(
            new TaskAttemptEvent(
                reduceTask.getAttempts().values().iterator().next().getID(),
                TaskAttemptEventType.TA_DONE));

    app.waitForState(job, JobState.SUCCEEDED);
  }
예제 #17
0
  @Test
  public void testHistoryParsingForFailedAttempts() throws Exception {
    LOG.info("STARTING testHistoryParsingForFailedAttempts");
    try {
      Configuration conf = new Configuration();
      conf.setClass(
          CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
          MyResolver.class,
          DNSToSwitchMapping.class);
      RackResolver.init(conf);
      MRApp app =
          new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), true);
      app.submit(conf);
      Job job = app.getContext().getAllJobs().values().iterator().next();
      JobId jobId = job.getID();
      app.waitForState(job, JobState.SUCCEEDED);

      // make sure all events are flushed
      app.waitForState(Service.STATE.STOPPED);

      String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);
      JobHistory jobHistory = new JobHistory();
      jobHistory.init(conf);

      JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId).getJobIndexInfo();
      String jobhistoryFileName = FileNameIndexUtils.getDoneFileName(jobIndexInfo);

      Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName);
      FSDataInputStream in = null;
      FileContext fc = null;
      try {
        fc = FileContext.getFileContext(conf);
        in = fc.open(fc.makeQualified(historyFilePath));
      } catch (IOException ioe) {
        LOG.info("Can not open history file: " + historyFilePath, ioe);
        throw (new Exception("Can not open History File"));
      }

      JobHistoryParser parser = new JobHistoryParser(in);
      JobInfo jobInfo = parser.parse();
      Exception parseException = parser.getParseException();
      Assert.assertNull("Caught an expected exception " + parseException, parseException);
      int noOffailedAttempts = 0;
      Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
      for (Task task : job.getTasks().values()) {
        TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
        for (TaskAttempt taskAttempt : task.getAttempts().values()) {
          TaskAttemptInfo taskAttemptInfo =
              taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
          // Verify rack-name for all task attempts
          Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
          if (taskAttemptInfo.getTaskStatus().equals("FAILED")) {
            noOffailedAttempts++;
          }
        }
      }
      Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts);
    } finally {
      LOG.info("FINISHED testHistoryParsingForFailedAttempts");
    }
  }
예제 #18
0
  private void checkHistoryParsing(
      final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name"));
    long amStartTimeEst = System.currentTimeMillis();
    conf.setClass(
        CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
        MyResolver.class,
        DNSToSwitchMapping.class);
    RackResolver.init(conf);
    MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true);
    app.submit(conf);
    Job job = app.getContext().getAllJobs().values().iterator().next();
    JobId jobId = job.getID();
    LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString());
    app.waitForState(job, JobState.SUCCEEDED);

    // make sure all events are flushed
    app.waitForState(Service.STATE.STOPPED);

    String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);

    FileContext fc = null;
    try {
      fc = FileContext.getFileContext(conf);
    } catch (IOException ioe) {
      LOG.info("Can not get FileContext", ioe);
      throw (new Exception("Can not get File Context"));
    }

    if (numMaps == numSuccessfulMaps) {
      String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId);
      Path summaryFile = new Path(jobhistoryDir, summaryFileName);
      String jobSummaryString = getJobSummary(fc, summaryFile);
      Assert.assertNotNull(jobSummaryString);
      Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100"));
      Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100"));

      Map<String, String> jobSummaryElements = new HashMap<String, String>();
      StringTokenizer strToken = new StringTokenizer(jobSummaryString, ",");
      while (strToken.hasMoreTokens()) {
        String keypair = strToken.nextToken();
        jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]);
      }

      Assert.assertEquals(
          "JobId does not match", jobId.toString(), jobSummaryElements.get("jobId"));
      Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName"));
      Assert.assertTrue(
          "submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0);
      Assert.assertTrue(
          "launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0);
      Assert.assertTrue(
          "firstMapTaskLaunchTime should not be 0",
          Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0);
      Assert.assertTrue(
          "firstReduceTaskLaunchTime should not be 0",
          Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0);
      Assert.assertTrue(
          "finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0);
      Assert.assertEquals(
          "Mismatch in num map slots",
          numSuccessfulMaps,
          Integer.parseInt(jobSummaryElements.get("numMaps")));
      Assert.assertEquals(
          "Mismatch in num reduce slots",
          numReduces,
          Integer.parseInt(jobSummaryElements.get("numReduces")));
      Assert.assertEquals(
          "User does not match", System.getProperty("user.name"), jobSummaryElements.get("user"));
      Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue"));
      Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status"));
    }

    JobHistory jobHistory = new JobHistory();
    jobHistory.init(conf);
    HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId);
    JobInfo jobInfo;
    long numFinishedMaps;

    synchronized (fileInfo) {
      Path historyFilePath = fileInfo.getHistoryFile();
      FSDataInputStream in = null;
      LOG.info("JobHistoryFile is: " + historyFilePath);
      try {
        in = fc.open(fc.makeQualified(historyFilePath));
      } catch (IOException ioe) {
        LOG.info("Can not open history file: " + historyFilePath, ioe);
        throw (new Exception("Can not open History File"));
      }

      JobHistoryParser parser = new JobHistoryParser(in);
      final EventReader realReader = new EventReader(in);
      EventReader reader = Mockito.mock(EventReader.class);
      if (numMaps == numSuccessfulMaps) {
        reader = realReader;
      } else {
        final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack!
        Mockito.when(reader.getNextEvent())
            .thenAnswer(
                new Answer<HistoryEvent>() {
                  public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
                    HistoryEvent event = realReader.getNextEvent();
                    if (event instanceof TaskFinishedEvent) {
                      numFinishedEvents.incrementAndGet();
                    }

                    if (numFinishedEvents.get() <= numSuccessfulMaps) {
                      return event;
                    } else {
                      throw new IOException("test");
                    }
                  }
                });
      }

      jobInfo = parser.parse(reader);

      numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps);

      if (numFinishedMaps != numMaps) {
        Exception parseException = parser.getParseException();
        Assert.assertNotNull("Didn't get expected parse exception", parseException);
      }
    }

    Assert.assertEquals(
        "Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername());
    Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname());
    Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName());
    Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath());
    Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps);
    Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces());
    Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized());
    Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
    int totalTasks = allTasks.size();
    Assert.assertEquals("total number of tasks is incorrect  ", (numMaps + numReduces), totalTasks);

    // Verify aminfo
    Assert.assertEquals(1, jobInfo.getAMInfos().size());
    Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost());
    AMInfo amInfo = jobInfo.getAMInfos().get(0);
    Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort());
    Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort());
    Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId());
    Assert.assertEquals(
        amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId());
    Assert.assertTrue(
        amInfo.getStartTime() <= System.currentTimeMillis()
            && amInfo.getStartTime() >= amStartTimeEst);

    ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1);
    // Assert at taskAttempt level
    for (TaskInfo taskInfo : allTasks.values()) {
      int taskAttemptCount = taskInfo.getAllTaskAttempts().size();
      Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount);
      TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next();
      Assert.assertNotNull(taInfo.getContainerId());
      // Verify the wrong ctor is not being used. Remove after mrv1 is removed.
      Assert.assertFalse(taInfo.getContainerId().equals(fakeCid));
    }

    // Deep compare Job and JobInfo
    for (Task task : job.getTasks().values()) {
      TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
      Assert.assertNotNull("TaskInfo not found", taskInfo);
      for (TaskAttempt taskAttempt : task.getAttempts().values()) {
        TaskAttemptInfo taskAttemptInfo =
            taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
        Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo);
        Assert.assertEquals(
            "Incorrect shuffle port for task attempt",
            taskAttempt.getShufflePort(),
            taskAttemptInfo.getShufflePort());
        if (numMaps == numSuccessfulMaps) {
          Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname());
          Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort());

          // Verify rack-name
          Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
        }
      }
    }
  }
예제 #19
0
  /**
   * Go through a job and update the member variables with counts for information to output in the
   * page.
   *
   * @param job the job to get counts for.
   */
  private void countTasksAndAttempts(Job job) {
    final Map<TaskId, Task> tasks = job.getTasks();
    if (tasks == null) {
      return;
    }
    for (Task task : tasks.values()) {
      switch (task.getType()) {
        case MAP:
          // Task counts
          switch (task.getState()) {
            case RUNNING:
              ++this.mapsRunning;
              break;
            case SCHEDULED:
              ++this.mapsPending;
              break;
            default:
              break;
          }
          break;
        case REDUCE:
          // Task counts
          switch (task.getState()) {
            case RUNNING:
              ++this.reducesRunning;
              break;
            case SCHEDULED:
              ++this.reducesPending;
              break;
            default:
              break;
          }
          break;
        default:
          throw new IllegalStateException("Task type is neither map nor reduce: " + task.getType());
      }
      // Attempts counts
      Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
      int newAttempts, running, successful, failed, killed;
      for (TaskAttempt attempt : attempts.values()) {

        newAttempts = 0;
        running = 0;
        successful = 0;
        failed = 0;
        killed = 0;
        if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
          ++newAttempts;
        } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
          ++running;
        } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt.getState())) {
          ++successful;
        } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
          ++failed;
        } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
          ++killed;
        }

        switch (task.getType()) {
          case MAP:
            this.newMapAttempts += newAttempts;
            this.runningMapAttempts += running;
            this.successfulMapAttempts += successful;
            this.failedMapAttempts += failed;
            this.killedMapAttempts += killed;
            break;
          case REDUCE:
            this.newReduceAttempts += newAttempts;
            this.runningReduceAttempts += running;
            this.successfulReduceAttempts += successful;
            this.failedReduceAttempts += failed;
            this.killedReduceAttempts += killed;
            break;
          default:
            throw new IllegalStateException("Task type neither map nor reduce: " + task.getType());
        }
      }
    }
  }
  @Override
  public void updateAttempt(TaskAttemptStatus status, long timestamp) {
    super.updateAttempt(status, timestamp);

    TaskAttemptId attemptID = status.id;
    TaskId taskID = attemptID.getTaskId();
    JobId jobID = taskID.getJobId();
    Job job = context.getJob(jobID);

    if (job == null) {
      return;
    }

    Task task = job.getTask(taskID);

    if (task == null) {
      return;
    }

    TaskAttempt taskAttempt = task.getAttempt(attemptID);

    if (taskAttempt == null) {
      return;
    }

    Long boxedStart = startTimes.get(attemptID);
    long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;

    // We need to do two things.
    //  1: If this is a completion, we accumulate statistics in the superclass
    //  2: If this is not a completion, we learn more about it.

    // This is not a completion, but we're cooking.
    //
    if (taskAttempt.getState() == TaskAttemptState.RUNNING) {
      // See if this task is already in the registry
      AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
      AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt);

      if (estimateContainer == null) {
        if (attemptRuntimeEstimates.get(taskAttempt) == null) {
          attemptRuntimeEstimates.put(taskAttempt, new AtomicLong());

          estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
        }
      }

      if (estimateVarianceContainer == null) {
        attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong());
        estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt);
      }

      long estimate = -1;
      long varianceEstimate = -1;

      // This code assumes that we'll never consider starting a third
      //  speculative task attempt if two are already running for this task
      if (start > 0 && timestamp > start) {
        estimate = (long) ((timestamp - start) / Math.max(0.0001, status.progress));
        varianceEstimate = (long) (estimate * status.progress / 10);
      }
      if (estimateContainer != null) {
        estimateContainer.set(estimate);
      }
      if (estimateVarianceContainer != null) {
        estimateVarianceContainer.set(varianceEstimate);
      }
    }
  }
예제 #21
0
  /*
   * (non-Javadoc)
   * @see org.apache.hadoop.yarn.webapp.view.HtmlBlock#render(org.apache.hadoop.yarn.webapp.view.HtmlBlock.Block)
   */
  @Override protected void render(Block html) {
    if (app.getJob() == null) {
      html.
        h2($(TITLE));
      return;
    }
    TaskType type = null;
    String symbol = $(TASK_TYPE);
    if (!symbol.isEmpty()) {
      type = MRApps.taskType(symbol);
    }
    THEAD<TABLE<Hamlet>> thead;
    if(type != null)
      thead = html.table("#"+app.getJob().getID() 
        + type).$class("dt-tasks").thead();
    else
      thead = html.table("#tasks").thead();
    //Create the spanning row
    int attemptColSpan = type == TaskType.REDUCE ? 8 : 3;
    thead.tr().
      th().$colspan(5).$class("ui-state-default")._("Task")._().
      th().$colspan(attemptColSpan).$class("ui-state-default").
        _("Successful Attempt")._().
    _();

    TR<THEAD<TABLE<Hamlet>>> theadRow = thead.
          tr().
            th("Name").
            th("State").
            th("Start Time").
            th("Finish Time").
            th("Elapsed Time").
            th("Start Time"); //Attempt

    if(type == TaskType.REDUCE) {
      theadRow.th("Shuffle Finish Time"); //Attempt
      theadRow.th("Merge Finish Time"); //Attempt
    }

    theadRow.th("Finish Time"); //Attempt

    if(type == TaskType.REDUCE) {
      theadRow.th("Elapsed Time Shuffle"); //Attempt
      theadRow.th("Elapsed Time Merge"); //Attempt
      theadRow.th("Elapsed Time Reduce"); //Attempt
    }
    theadRow.th("Elapsed Time"); //Attempt

    TBODY<TABLE<Hamlet>> tbody = theadRow._()._().tbody();

    // Write all the data into a JavaScript array of arrays for JQuery
    // DataTables to display
    StringBuilder tasksTableData = new StringBuilder("[\n");
    for (Task task : app.getJob().getTasks().values()) {
      if (type != null && task.getType() != type) {
        continue;
      }
      TaskInfo info = new TaskInfo(task);
      String tid = info.getId();

      long startTime = info.getStartTime();
      long finishTime = info.getFinishTime();
      long elapsed = info.getElapsedTime();

      long attemptStartTime = -1;
      long shuffleFinishTime = -1;
      long sortFinishTime = -1;
      long attemptFinishTime = -1;
      long elapsedShuffleTime = -1;
      long elapsedSortTime = -1;;
      long elapsedReduceTime = -1;
      long attemptElapsed = -1;
      TaskAttempt successful = info.getSuccessful();
      if(successful != null) {
        TaskAttemptInfo ta;
        if(type == TaskType.REDUCE) {
          ReduceTaskAttemptInfo rta = new ReduceTaskAttemptInfo(successful, type);
          shuffleFinishTime = rta.getShuffleFinishTime();
          sortFinishTime = rta.getMergeFinishTime();
          elapsedShuffleTime = rta.getElapsedShuffleTime();
          elapsedSortTime = rta.getElapsedMergeTime();
          elapsedReduceTime = rta.getElapsedReduceTime();
          ta = rta;
        } else {
          ta = new TaskAttemptInfo(successful, type, false);
        }
        attemptStartTime = ta.getStartTime();
        attemptFinishTime = ta.getFinishTime();
        attemptElapsed = ta.getElapsedTime();
      }

      tasksTableData.append("[\"")
      .append("<a href='" + url("task", tid)).append("'>")
      .append(tid).append("</a>\",\"")
      .append(info.getState()).append("\",\"")
      .append(startTime).append("\",\"")
      .append(finishTime).append("\",\"")
      .append(elapsed).append("\",\"")
      .append(attemptStartTime).append("\",\"");

      if(type == TaskType.REDUCE) {
        tasksTableData.append(shuffleFinishTime).append("\",\"")
        .append(sortFinishTime).append("\",\"");
      }
      tasksTableData.append(attemptFinishTime).append("\",\"");
      if(type == TaskType.REDUCE) {
        tasksTableData.append(elapsedShuffleTime).append("\",\"")
        .append(elapsedSortTime).append("\",\"")
        .append(elapsedReduceTime).append("\",\"");
      }
      tasksTableData.append(attemptElapsed).append("\"],\n");
    }
    //Remove the last comma and close off the array of arrays
    if(tasksTableData.charAt(tasksTableData.length() - 2) == ',') {
      tasksTableData.delete(
        tasksTableData.length()-2, tasksTableData.length()-1);
    }
    tasksTableData.append("]");
    html.script().$type("text/javascript").
    _("var tasksTableData=" + tasksTableData)._();
    
    TR<TFOOT<TABLE<Hamlet>>> footRow = tbody._().tfoot().tr();
    footRow.th().input("search_init").$type(InputType.text).$name("task")
        .$value("ID")._()._().th().input("search_init").$type(InputType.text)
        .$name("state").$value("State")._()._().th().input("search_init")
        .$type(InputType.text).$name("start_time").$value("Start Time")._()._()
        .th().input("search_init").$type(InputType.text).$name("finish_time")
        .$value("Finish Time")._()._().th().input("search_init")
        .$type(InputType.text).$name("elapsed_time").$value("Elapsed Time")._()
        ._().th().input("search_init").$type(InputType.text)
        .$name("attempt_start_time").$value("Start Time")._()._();

    if(type == TaskType.REDUCE) {
      footRow.th().input("search_init").$type(InputType.text)
          .$name("shuffle_time").$value("Shuffle Time")._()._();
      footRow.th().input("search_init").$type(InputType.text)
          .$name("merge_time").$value("Merge Time")._()._();
    }

    footRow.th().input("search_init").$type(InputType.text)
        .$name("attempt_finish").$value("Finish Time")._()._();

    if(type == TaskType.REDUCE) {
      footRow.th().input("search_init").$type(InputType.text)
          .$name("elapsed_shuffle_time").$value("Elapsed Shuffle Time")._()._();
      footRow.th().input("search_init").$type(InputType.text)
          .$name("elapsed_merge_time").$value("Elapsed Merge Time")._()._();
      footRow.th().input("search_init").$type(InputType.text)
          .$name("elapsed_reduce_time").$value("Elapsed Reduce Time")._()._();
    }

    footRow.th().input("search_init").$type(InputType.text)
        .$name("attempt_elapsed").$value("Elapsed Time")._()._();

    footRow._()._()._();
  }