Exemplo n.º 1
0
 // wait till the job retires
 private void waitTillRetire(JobID id, JobTracker jobtracker) {
   // wait for job to get retired
   JobInProgress job = jobtracker.getJob(id);
   for (int i = 0; i < 10 && job != null; i++) {
     UtilsForTests.waitFor(1000);
     job = jobtracker.getJob(id);
   }
   assertNull("Job did not retire", job);
 }
  private void printFailures(
      JspWriter out, JobTracker tracker, JobID jobId, String kind, String cause)
      throws IOException {
    JobInProgress job = (JobInProgress) tracker.getJob(jobId);
    if (job == null) {
      out.print("<b>Job " + jobId + " not found.</b><br>\n");
      return;
    }

    boolean includeMap = false;
    boolean includeReduce = false;
    if (kind == null) {
      includeMap = true;
      includeReduce = true;
    } else if ("map".equals(kind)) {
      includeMap = true;
    } else if ("reduce".equals(kind)) {
      includeReduce = true;
    } else if ("all".equals(kind)) {
      includeMap = true;
      includeReduce = true;
    } else {
      out.print("<b>Kind " + kind + " not supported.</b><br>\n");
      return;
    }

    TaskStatus.State state = null;
    try {
      if (cause != null) {
        state = TaskStatus.State.valueOf(cause.toUpperCase());
        if (state != TaskStatus.State.FAILED && state != TaskStatus.State.KILLED) {
          out.print("<b>Cause '" + cause + "' is not an 'unsuccessful' state.</b><br>\n");
          return;
        }
      }
    } catch (IllegalArgumentException e) {
      out.print("<b>Cause '" + cause + "' not supported.</b><br>\n");
      return;
    }

    out.print("<table border=2 cellpadding=\"5\" cellspacing=\"2\">");
    out.print(
        "<tr><th>Attempt</th><th>Task</th><th>Machine</th><th>State</th>"
            + "<th>Error</th><th>Logs</th></tr>\n");
    if (includeMap) {
      TaskInProgress[] tips = job.getTasks(TaskType.MAP);
      for (int i = 0; i < tips.length; ++i) {
        printFailedAttempts(out, tracker, jobId, tips[i], state);
      }
    }
    if (includeReduce) {
      TaskInProgress[] tips = job.getTasks(TaskType.REDUCE);
      for (int i = 0; i < tips.length; ++i) {
        printFailedAttempts(out, tracker, jobId, tips[i], state);
      }
    }
    out.print("</table>\n");
  }
Exemplo n.º 3
0
  /**
   * Test job retire with tasks that report their *first* status only after the job retires. Steps :
   * - Start a mini-mr cluster with 1 task-tracker having only map slots. Note that this
   * task-tracker will take care of setup/cleanup and the map tasks. - Submit a job with 1 map task
   * and 1 reduce task - Wait for the job to finish the map task - Start a 2nd tracker that waits
   * for a long time after contacting the JT. - Wait for the 2nd tracker to get stuck - Kill the job
   * - Wait for the job to retire - Check if the tip mappings are cleaned up.
   */
  public void testJobRetireWithUnreportedTasks() throws Exception {
    MiniMRCluster mr = null;
    try {
      JobConf conf = new JobConf();
      // set the num-map-slots to 1 so that no reduce tasks but setup/cleanup
      // can run on it
      conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
      conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 0);

      mr = startCluster(conf, 1);
      JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker();

      RunningJob job =
          UtilsForTests.runJob(
              mr.createJobConf(), new Path(testDir, "in-1"), new Path(testDir, "out-1"), 1, 1);
      JobID id = JobID.downgrade(job.getID());
      JobInProgress jip = jobtracker.getJob(id);

      // wait 100 secs for the job to complete its map task
      for (int i = 0; i < 1000 && jip.finishedMaps() < 1; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jip.finishedMaps(), 1);

      // start a tracker that will wait
      LOG.info("Adding a waiting tracker");
      TaskTrackerRunner testTrackerRunner =
          mr.new TaskTrackerRunner(1, 1, null, mr.createJobConf()) {
            @Override
            TaskTracker createTaskTracker(JobConf conf) throws InterruptedException, IOException {
              return new WaitingTaskTracker(conf);
            }
          };
      mr.addTaskTracker(testTrackerRunner);
      LOG.info("Waiting tracker added");

      WaitingTaskTracker testTT = (WaitingTaskTracker) testTrackerRunner.getTaskTracker();

      // wait 100 secs for the newly started task-tracker to join
      for (int i = 0; i < 1000 && jobtracker.taskTrackers().size() < 2; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jobtracker.taskTrackers().size(), 2);
      LOG.info("Cluster is now up with 2 trackers");
      // stop the test-tt as its no longer required
      mr.stopTaskTracker(mr.getTaskTrackerID(testTT.getName()));

      // 1 reduce task should be scheduled
      assertEquals("TestTT contacted but no reduce task scheduled on it", 1, jip.runningReduces());

      // kill the job
      LOG.info("Killing job " + id);
      job.killJob();

      // check if the reduce task attempt status is missing
      TaskInProgress tip = jip.getTasks(TaskType.REDUCE)[0];
      assertNull(tip.getTaskStatus(tip.getAllTaskAttemptIDs()[0]));

      // wait for the job to retire
      waitTillRetire(id, jobtracker);

      // check the taskidToTIPMap
      for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) {
        LOG.info("TaskidToTIP : " + tid);
      }
      assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size());
    } finally {
      if (mr != null) {
        mr.shutdown();
      }
      // cleanup
      FileUtil.fullyDelete(new File(testDir.toString()));
    }
  }