Example #1
1
  /**
   * We lost the task tracker! All task-tracker structures have already been updated. Just process
   * the contained tasks and any jobs that might be affected.
   */
  void lostTaskTracker(String trackerName) {
    LOG.info("Lost tracker '" + trackerName + "'");
    TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName);
    trackerToTaskMap.remove(trackerName);

    if (lostTasks != null) {
      for (Iterator it = lostTasks.iterator(); it.hasNext(); ) {
        String taskId = (String) it.next();
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);

        // Tell the job to fail the relevant task
        JobInProgress job = tip.getJob();
        job.failedTask(tip, taskId, trackerName);
      }
    }
  }
Example #2
0
 @Override
 public void updateDemand() {
   demand = 0;
   if (isRunnable()) {
     // For reduces, make sure enough maps are done that reduces can launch
     if (taskType == TaskType.REDUCE && !job.scheduleReduces()) return;
     // Add up demand from each TaskInProgress; each TIP can either
     // - have no attempts running, in which case it demands 1 slot
     // - have N attempts running, in which case it demands N slots, and may
     //   potentially demand one more slot if it needs to be speculated
     TaskInProgress[] tips =
         (taskType == TaskType.MAP ? job.getTasks(TaskType.MAP) : job.getTasks(TaskType.REDUCE));
     boolean speculationEnabled =
         (taskType == TaskType.MAP ? job.hasSpeculativeMaps() : job.hasSpeculativeReduces());
     long time = scheduler.getClock().getTime();
     for (TaskInProgress tip : tips) {
       if (!tip.isComplete()) {
         if (tip.isRunning()) {
           // Count active tasks and any speculative task we want to launch
           demand += tip.getActiveTasks().size();
           if (speculationEnabled && tip.hasSpeculativeTask(time, job.getStatus().mapProgress()))
             demand += 1;
         } else {
           // Need to launch 1 task
           demand += 1;
         }
       }
     }
   }
 }
Example #3
0
  // create a new Task for the given tip and make it running
  private TaskAttemptID createAndAddAttempt(TaskInProgress tip, int attemptId) {
    // create a fake attempt for this fake task
    TaskAttemptID taskid = new TaskAttemptID(tip.getTIPId(), attemptId);

    // insert this fake task into the jobtracker by making it running
    tip.addRunningTask(taskid, "test-tt");

    return taskid;
  }
Example #4
0
 /**
  * A tracker wants to know if any of its Tasks have been closed (because the job completed,
  * whether successfully or not)
  */
 public synchronized String pollForTaskWithClosedJob(String taskTracker) {
   TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker);
   if (taskIds != null) {
     for (Iterator it = taskIds.iterator(); it.hasNext(); ) {
       String taskId = (String) it.next();
       TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);
       if (tip.shouldCloseForClosedJob(taskId)) {
         //
         // This is how the JobTracker ends a task at the TaskTracker.
         // It may be successfully completed, or may be killed in
         // mid-execution.
         //
         return taskId;
       }
     }
   }
   return null;
 }
Example #5
0
 public synchronized TaskReport[] getReduceTaskReports(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job == null) {
     return new TaskReport[0];
   } else {
     Vector reports = new Vector();
     Vector completeReduceTasks = job.reportTasksInProgress(false, true);
     for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     Vector incompleteReduceTasks = job.reportTasksInProgress(false, false);
     for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]);
   }
 }
Example #6
0
  /**
   * Accept and process a new TaskTracker profile. We might have known about the TaskTracker
   * previously, or it might be brand-new. All task-tracker structures have already been updated.
   * Just process the contained tasks and any jobs that might be affected.
   */
  void updateTaskStatuses(TaskTrackerStatus status) {
    for (Iterator it = status.taskReports(); it.hasNext(); ) {
      TaskStatus report = (TaskStatus) it.next();
      TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId());
      if (tip == null) {
        LOG.info(
            "Serious problem.  While updating status, cannot find taskid " + report.getTaskId());
      } else {
        JobInProgress job = tip.getJob();
        job.updateTaskStatus(tip, report);

        if (report.getRunState() == TaskStatus.SUCCEEDED) {
          job.completedTask(tip, report.getTaskId());
        } else if (report.getRunState() == TaskStatus.FAILED) {
          // Tell the job to fail the relevant task
          job.failedTask(tip, report.getTaskId(), status.getTrackerName());
        }
      }
    }
  }
Example #7
0
  // Mock a job run such that the jobtracker is in a state similar to that
  // resulting from an actual job run.
  // Steps :
  //   - generate a new job-id
  //   - create and add a JobInProgress object using the fake job-id
  //   - create and add a fake tip of the passed type 't' under the fake job
  //     Note that t can be a MAP or a REDUCE or a JOB_SETUP or a JOB_CLEANUP.
  //   - create and add a fake attempt under the fake tip
  //   - remove the job from the jobtracker
  //   - check if the fake attempt is removed from the jobtracker
  private void testRemoveJobTasks(JobTracker jobtracker, JobConf conf, TaskType type) {
    // create and submit a job
    JobInProgress jip = createAndAddJob(jobtracker, conf);
    // create and add a tip
    TaskInProgress tip = createAndAddTIP(jobtracker, jip, type);
    // create and add an attempt
    TaskAttemptID taskid = createAndAddAttempt(tip, 0);

    // this fake attempt should not have any status
    assertNull(tip.getTaskStatus(taskid));

    // remove the job tasks for this fake job from the jobtracker
    jobtracker.removeJobTasks(jip);

    // check the taskidToTIPMap
    for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) {
      LOG.info("TaskidToTIP : " + tid);
    }

    // check if the fake attempt is removed from the jobtracker
    assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size());
  }
Example #8
0
  /**
   * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks.
   * This exists so the reduce task thread can locate map task outputs.
   */
  public synchronized MapOutputLocation[] locateMapOutputs(
      String taskId, String[][] mapTasksNeeded) {
    ArrayList v = new ArrayList();
    for (int i = 0; i < mapTasksNeeded.length; i++) {
      for (int j = 0; j < mapTasksNeeded[i].length; j++) {
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]);
        if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) {
          String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]);
          TaskTrackerStatus tracker;
          synchronized (taskTrackers) {
            tracker = (TaskTrackerStatus) taskTrackers.get(trackerId);
          }
          v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort()));
          break;
        }
      }
    }
    // randomly shuffle results to load-balance map output requests
    Collections.shuffle(v);

    return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]);
  }
 private void updateTaskCounts() {
   for (Map.Entry<JobInProgress, JobInfo> entry : infos.entrySet()) {
     JobInProgress job = entry.getKey();
     JobInfo info = entry.getValue();
     if (job.getStatus().getRunState() != JobStatus.RUNNING)
       continue; // Job is still in PREP state and tasks aren't initialized
     // Count maps
     int totalMaps = job.numMapTasks;
     int finishedMaps = 0;
     int runningMaps = 0;
     for (TaskInProgress tip : job.getMapTasks()) {
       if (tip.isComplete()) {
         finishedMaps += 1;
       } else if (tip.isRunning()) {
         runningMaps += tip.getActiveTasks().size();
       }
     }
     info.runningMaps = runningMaps;
     info.neededMaps =
         (totalMaps - runningMaps - finishedMaps + taskSelector.neededSpeculativeMaps(job));
     // Count reduces
     int totalReduces = job.numReduceTasks;
     int finishedReduces = 0;
     int runningReduces = 0;
     for (TaskInProgress tip : job.getReduceTasks()) {
       if (tip.isComplete()) {
         finishedReduces += 1;
       } else if (tip.isRunning()) {
         runningReduces += tip.getActiveTasks().size();
       }
     }
     info.runningReduces = runningReduces;
     info.neededReduces =
         (totalReduces
             - runningReduces
             - finishedReduces
             + taskSelector.neededSpeculativeReduces(job));
     // If the job was marked as not runnable due to its user or pool having
     // too many active jobs, set the neededMaps/neededReduces to 0. We still
     // count runningMaps/runningReduces however so we can give it a deficit.
     if (!info.runnable) {
       info.neededMaps = 0;
       info.neededReduces = 0;
     }
   }
 }
Example #10
0
  ///////////////////////////////////////////////////////
  // Maintain lookup tables; called by JobInProgress
  // and TaskInProgress
  ///////////////////////////////////////////////////////
  void createTaskEntry(String taskid, String taskTracker, TaskInProgress tip) {
    LOG.info(
        "Adding task '"
            + taskid
            + "' to tip "
            + tip.getTIPId()
            + ", for tracker '"
            + taskTracker
            + "'");

    // taskid --> tracker
    taskidToTrackerMap.put(taskid, taskTracker);

    // tracker --> taskid
    TreeSet taskset = (TreeSet) trackerToTaskMap.get(taskTracker);
    if (taskset == null) {
      taskset = new TreeSet();
      trackerToTaskMap.put(taskTracker, taskset);
    }
    taskset.add(taskid);

    // taskid --> TIP
    taskidToTIPMap.put(taskid, tip);
  }
Example #11
0
  /**
   * Test job retire with tasks that report their *first* status only after the job retires. Steps :
   * - Start a mini-mr cluster with 1 task-tracker having only map slots. Note that this
   * task-tracker will take care of setup/cleanup and the map tasks. - Submit a job with 1 map task
   * and 1 reduce task - Wait for the job to finish the map task - Start a 2nd tracker that waits
   * for a long time after contacting the JT. - Wait for the 2nd tracker to get stuck - Kill the job
   * - Wait for the job to retire - Check if the tip mappings are cleaned up.
   */
  public void testJobRetireWithUnreportedTasks() throws Exception {
    MiniMRCluster mr = null;
    try {
      JobConf conf = new JobConf();
      // set the num-map-slots to 1 so that no reduce tasks but setup/cleanup
      // can run on it
      conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
      conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 0);

      mr = startCluster(conf, 1);
      JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker();

      RunningJob job =
          UtilsForTests.runJob(
              mr.createJobConf(), new Path(testDir, "in-1"), new Path(testDir, "out-1"), 1, 1);
      JobID id = JobID.downgrade(job.getID());
      JobInProgress jip = jobtracker.getJob(id);

      // wait 100 secs for the job to complete its map task
      for (int i = 0; i < 1000 && jip.finishedMaps() < 1; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jip.finishedMaps(), 1);

      // start a tracker that will wait
      LOG.info("Adding a waiting tracker");
      TaskTrackerRunner testTrackerRunner =
          mr.new TaskTrackerRunner(1, 1, null, mr.createJobConf()) {
            @Override
            TaskTracker createTaskTracker(JobConf conf) throws InterruptedException, IOException {
              return new WaitingTaskTracker(conf);
            }
          };
      mr.addTaskTracker(testTrackerRunner);
      LOG.info("Waiting tracker added");

      WaitingTaskTracker testTT = (WaitingTaskTracker) testTrackerRunner.getTaskTracker();

      // wait 100 secs for the newly started task-tracker to join
      for (int i = 0; i < 1000 && jobtracker.taskTrackers().size() < 2; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jobtracker.taskTrackers().size(), 2);
      LOG.info("Cluster is now up with 2 trackers");
      // stop the test-tt as its no longer required
      mr.stopTaskTracker(mr.getTaskTrackerID(testTT.getName()));

      // 1 reduce task should be scheduled
      assertEquals("TestTT contacted but no reduce task scheduled on it", 1, jip.runningReduces());

      // kill the job
      LOG.info("Killing job " + id);
      job.killJob();

      // check if the reduce task attempt status is missing
      TaskInProgress tip = jip.getTasks(TaskType.REDUCE)[0];
      assertNull(tip.getTaskStatus(tip.getAllTaskAttemptIDs()[0]));

      // wait for the job to retire
      waitTillRetire(id, jobtracker);

      // check the taskidToTIPMap
      for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) {
        LOG.info("TaskidToTIP : " + tid);
      }
      assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size());
    } finally {
      if (mr != null) {
        mr.shutdown();
      }
      // cleanup
      FileUtil.fullyDelete(new File(testDir.toString()));
    }
  }
  private void printFailedAttempts(
      JspWriter out,
      JobTracker tracker,
      JobID jobId,
      TaskInProgress tip,
      TaskStatus.State failState)
      throws IOException {
    TaskStatus[] statuses = tip.getTaskStatuses();
    TaskID tipId = tip.getTIPId();
    for (int i = 0; i < statuses.length; ++i) {
      TaskStatus.State taskState = statuses[i].getRunState();
      if ((failState == null
              && (taskState == TaskStatus.State.FAILED || taskState == TaskStatus.State.KILLED))
          || taskState == failState) {
        String taskTrackerName = statuses[i].getTaskTracker();
        TaskTrackerStatus taskTracker = tracker.getTaskTrackerStatus(taskTrackerName);
        out.print(
            "<tr><td>"
                + statuses[i].getTaskID()
                + "</td><td><a href=\"taskdetails.jsp?jobid="
                + jobId
                + "&tipid="
                + tipId
                + "\">"
                + tipId
                + "</a></td>");
        if (taskTracker == null) {
          out.print("<td>" + taskTrackerName + "</td>");
        } else {
          out.print(
              "<td><a href=\"http://"
                  + taskTracker.getHost()
                  + ":"
                  + taskTracker.getHttpPort()
                  + "\">"
                  + taskTracker.getHost()
                  + "</a></td>");
        }
        out.print("<td>" + taskState + "</td>");
        out.print("<td><pre>");
        String[] failures = tracker.getTaskDiagnostics(statuses[i].getTaskID());
        if (failures == null) {
          out.print("&nbsp;");
        } else {
          for (int j = 0; j < failures.length; j++) {
            out.print(failures[j]);
            if (j < (failures.length - 1)) {
              out.print("\n-------\n");
            }
          }
        }
        out.print("</pre></td>");

        out.print("<td>");
        String taskLogUrl = null;
        if (taskTracker != null) {
          taskLogUrl =
              TaskLogServlet.getTaskLogUrl(
                  taskTracker.getHost(),
                  String.valueOf(taskTracker.getHttpPort()),
                  statuses[i].getTaskID().toString());
        }
        if (taskLogUrl != null) {
          String tailFourKBUrl = taskLogUrl + "&start=-4097";
          String tailEightKBUrl = taskLogUrl + "&start=-8193";
          String entireLogUrl = taskLogUrl;
          out.print("<a href=\"" + tailFourKBUrl + "\">Last 4KB</a><br/>");
          out.print("<a href=\"" + tailEightKBUrl + "\">Last 8KB</a><br/>");
          out.print("<a href=\"" + entireLogUrl + "\">All</a><br/>");
        } else {
          out.print("n/a"); // task tracker was lost
        }
        out.print("</td>");

        out.print("</tr>\n");
      }
    }
  }