Ejemplo n.º 1
5
  /** Process incoming heartbeat messages from the task trackers. */
  public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) {
    String trackerName = trackerStatus.getTrackerName();
    trackerStatus.setLastSeen(System.currentTimeMillis());

    synchronized (taskTrackers) {
      synchronized (trackerExpiryQueue) {
        boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus);
        if (initialContact) {
          // If it's first contact, then clear out any state hanging around
          if (seenBefore) {
            lostTaskTracker(trackerName);
          }
        } else {
          // If not first contact, there should be some record of the tracker
          if (!seenBefore) {
            return InterTrackerProtocol.UNKNOWN_TASKTRACKER;
          }
        }

        if (initialContact) {
          trackerExpiryQueue.add(trackerStatus);
        }
      }
    }

    updateTaskStatuses(trackerStatus);
    // LOG.info("Got heartbeat from "+trackerName);
    return InterTrackerProtocol.TRACKERS_OK;
  }
Ejemplo n.º 2
0
 public int compare(Object o1, Object o2) {
   TaskTrackerStatus p1 = (TaskTrackerStatus) o1;
   TaskTrackerStatus p2 = (TaskTrackerStatus) o2;
   if (p1.getLastSeen() < p2.getLastSeen()) {
     return -1;
   } else if (p1.getLastSeen() > p2.getLastSeen()) {
     return 1;
   } else {
     return (p1.getTrackerName().compareTo(p2.getTrackerName()));
   }
 }
Ejemplo n.º 3
0
    /**
     * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not
     * checked in for some time.
     */
    public void run() {
      while (shouldRun) {
        //
        // Thread runs periodically to check whether trackers should be expired.
        // The sleep interval must be no more than half the maximum expiry time
        // for a task tracker.
        //
        try {
          Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3);
        } catch (InterruptedException ie) {
        }

        //
        // Loop through all expired items in the queue
        //
        synchronized (taskTrackers) {
          synchronized (trackerExpiryQueue) {
            long now = System.currentTimeMillis();
            TaskTrackerStatus leastRecent = null;
            while ((trackerExpiryQueue.size() > 0)
                && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null)
                && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) {

              // Remove profile from head of queue
              trackerExpiryQueue.remove(leastRecent);
              String trackerName = leastRecent.getTrackerName();

              // Figure out if last-seen time should be updated, or if tracker is dead
              TaskTrackerStatus newProfile =
                  (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName());
              // Items might leave the taskTracker set through other means; the
              // status stored in 'taskTrackers' might be null, which means the
              // tracker has already been destroyed.
              if (newProfile != null) {
                if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) {
                  // Remove completely
                  updateTaskTrackerStatus(trackerName, null);
                  lostTaskTracker(leastRecent.getTrackerName());
                } else {
                  // Update time by inserting latest profile
                  trackerExpiryQueue.add(newProfile);
                }
              }
            }
          }
        }
      }
    }
Ejemplo n.º 4
0
  /**
   * Accept and process a new TaskTracker profile. We might have known about the TaskTracker
   * previously, or it might be brand-new. All task-tracker structures have already been updated.
   * Just process the contained tasks and any jobs that might be affected.
   */
  void updateTaskStatuses(TaskTrackerStatus status) {
    for (Iterator it = status.taskReports(); it.hasNext(); ) {
      TaskStatus report = (TaskStatus) it.next();
      TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId());
      if (tip == null) {
        LOG.info(
            "Serious problem.  While updating status, cannot find taskid " + report.getTaskId());
      } else {
        JobInProgress job = tip.getJob();
        job.updateTaskStatus(tip, report);

        if (report.getRunState() == TaskStatus.SUCCEEDED) {
          job.completedTask(tip, report.getTaskId());
        } else if (report.getRunState() == TaskStatus.FAILED) {
          // Tell the job to fail the relevant task
          job.failedTask(tip, report.getTaskId(), status.getTrackerName());
        }
      }
    }
  }
  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    boolean newIterationJob = false;
    scheduleMaps:
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          if (job.getJobConf().isIterative()) {

            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();

            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            // prepare the iterationid map and jobtask map
            if (!this.tracker_mtask_map.containsKey(iterativeAppID)) {
              // a new iterative algorithm
              Map<String, LinkedList<Integer>> new_tracker_task_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_mtask_map.put(iterativeAppID, new_tracker_task_map);

              Map<String, LinkedList<Integer>> new_tracker_rtask_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_rtask_map.put(iterativeAppID, new_tracker_rtask_map);

              // record the first job of the series of jobs in the iterations
              this.first_job_map.put(iterativeAppID, job.getJobID());

              // record the list of jobs for a iteration
              HashSet<JobID> jobs = new HashSet<JobID>();
              jobs.add(job.getJobID());
              this.iteration_jobs_map.put(iterativeAppID, jobs);
            }

            // this is the first job of the series of jobs
            if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                && job.getJobConf().isIterative()) {
              LOG.info(job.getJobID() + " is the first iteration job");
              newIterationJob = true;
            }

            // this is one of the following jobs, and prepare a assignment list for the assignment
            if (!newIterationJob) {
              LOG.info(job.getJobID() + " is not the first iteration job");
              this.iteration_jobs_map.get(iterativeAppID).add(job.getJobID());

              if (this.mtask_assign_map.get(job.getJobID()) == null) {
                // prepare the map task assignment list
                LOG.info("for job " + job.getJobID() + "'s assignment:");
                Map<String, LinkedList<Integer>> map_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  LOG.info("assign on tracker " + tracker);
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                    LOG.info("task id " + taskid);
                  }
                  map_task_assign.put(tracker, copytaskids);
                }
                this.mtask_assign_map.put(job.getJobID(), map_task_assign);

                // if one2one copy the map assign to reduce assign, the are with the same mapping
                if (jointype.equals("one2one")) {
                  // prepare the reduce task assignment list
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  for (Map.Entry<String, LinkedList<Integer>> entry :
                      this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                    String tracker = entry.getKey();
                    LinkedList<Integer> taskids = entry.getValue();
                    LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                    for (int taskid : taskids) {
                      copytaskids.add(taskid);
                    }
                    reduce_task_assign.put(tracker, copytaskids);
                  }
                  this.tracker_rtask_map.put(iterativeAppID, reduce_task_assign);
                }

                // prepare the reduce task assignment list for all cases
                Map<String, LinkedList<Integer>> reduce_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_rtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                  }
                  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
              }
            }

            Task t = null;

            // the first iteration or following iteration
            // if the first iteration: assign taskid by default (exception for the one2mul case,
            // where we assign staring from 0,...,n)
            // else if the following iterations: assign taskid based on the first iteration
            // assignment
            if (newIterationJob) {

              /**
               * the one2mul case should be carefully taken care, we want to assgin map0,map1,map2
               * and reduce0 to a tracker, and assign map3,map4,map5 and reduce1 to another tracker
               */
              if (jointype.equals("one2mul")
                  && !tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {

                // if contain the tracker, that means we have assigned tasks for this tracker

                int scala = job.getJobConf().getInt("mapred.iterative.data.scala", 1);
                // int mapsEachTracker = job.getJobConf().getNumMapTasks() / numTaskTrackers;
                int reducersEachTracker = job.getJobConf().getNumReduceTasks() / numTaskTrackers;
                if (job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0)
                  throw new IOException(
                      "job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0");

                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }
                if (!this.tracker_rtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                // for debugging
                String debugout1 = "maps: ";
                String debugout2 = "reduces: ";

                int reduceOffsetId =
                    (tracker_rtask_map.get(iterativeAppID).size() - 1)
                        * reducersEachTracker; // the start reduce id

                for (int count = 0; count < reducersEachTracker; count++) {
                  int reducepartitionid = reduceOffsetId + count;
                  debugout2 += reducepartitionid + " ";
                  tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(reducepartitionid);

                  for (int count2 = 0; count2 < scala; count2++) {
                    int mappartitionid = reducepartitionid * scala + count2;
                    // int mapid = job.splitTaskMap.get(mappartitionid);
                    debugout1 += mappartitionid + " ";
                    this.tracker_mtask_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .add(mappartitionid);
                  }
                }

                // print out for debug
                LOG.info(
                    "tracker "
                        + taskTracker.getTrackerName()
                        + " assigned tasks "
                        + debugout1
                        + " and "
                        + debugout2);

                // make the assignment list
                String tracker = taskTracker.getTrackerName();
                LinkedList<Integer> mtaskids =
                    this.tracker_mtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> mcopytaskids = new LinkedList<Integer>();
                for (int taskid : mtaskids) {
                  mcopytaskids.add(taskid);
                }
                if (!mtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> map_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.mtask_assign_map.put(job.getJobID(), map_task_assign);
                }
                this.mtask_assign_map.get(job.getJobID()).put(tracker, mcopytaskids);

                // prepare the reduce task assignment list
                LinkedList<Integer> rtaskids =
                    this.tracker_rtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> rcopytaskids = new LinkedList<Integer>();
                for (int taskid : rtaskids) {
                  rcopytaskids.add(taskid);
                }
                if (!rtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
                }
                this.rtask_assign_map.get(job.getJobID()).put(tracker, rcopytaskids);

                // assign a map task for this tracker
                Integer target = null;
                try {
                  target =
                      this.mtask_assign_map
                          .get(job.getJobID())
                          .get(taskTracker.getTrackerName())
                          .peekFirst();
                } catch (Exception e) {
                  e.printStackTrace();
                }

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewNodeOrRackLocalMapTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());
              }

            } else {
              Integer target = null;
              try {
                target =
                    this.mtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();
              } catch (Exception e) {
                e.printStackTrace();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // new iteration job and the first task for a tasktracker
              // for one2mul case, we don't need to record the assignment, since we already made the
              // assignment list beforehand
              if (!newIterationJob || jointype.equals("one2mul")) {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              } else {
                // record the assignment list for map tasks
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());

                // prepare the reduce assignment, for mapping with reduce
                if (jointype.equals("one2one")) {
                  // prepare the reduce assignment, for mapping with reduce
                  if (!first_job_reduces_map.containsKey(iterativeAppID)) {
                    Map<String, LinkedList<Integer>> tracker_reduce_map =
                        new HashMap<String, LinkedList<Integer>>();
                    first_job_reduces_map.put(iterativeAppID, tracker_reduce_map);
                  }

                  if (!first_job_reduces_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> reduces = new LinkedList<Integer>();
                    first_job_reduces_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), reduces);
                  }

                  first_job_reduces_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                }

                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              }

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            LOG.error("New Node Or Rack Local Map Task failed!");

            if (newIterationJob) {
              // Try to schedule a node-local or rack-local Map task
              t =
                  job.obtainNewNonLocalMapTask(
                      taskTrackerStatus,
                      numTaskTrackers,
                      taskTrackerManager.getNumberOfUniqueHosts());
            } else {
              Integer target =
                  this.mtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .peekFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNonLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // new iteration job and the first task for a tasktracker
              if (newIterationJob) {
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());
              } else {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
              }

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          } else {
            // not an iterative algorithm, normal schedule
            Task t = null;

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNodeOrRackLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNonLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    /** should maintain the reduce task location for the termination check */
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    // LOG.info("availableReduceSlots " + availableReduceSlots);
    if (availableReduceSlots > 0) {

      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          LOG.info("job " + job.getJobID());
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            LOG.info("have to continue " + job.getStatus().getRunState());
            continue;
          }

          Task t = null;

          if (job.getJobConf().isIterative()) {
            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();
            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            if (jointype.equals("one2one")) {
              // one-to-one or one-to-mul jobs

              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                  && job.getJobConf().isIterative()) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }

              Integer target = null;
              if (newIterationJob) {

                if (first_job_reduces_map.get(iterativeAppID) == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID);
                }

                if (first_job_reduces_map.get(iterativeAppID).get(taskTracker.getTrackerName())
                    == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID
                          + " from "
                          + taskTracker.getTrackerName());
                }

                target =
                    this.first_job_reduces_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              } else {
                // the task assignment has already been processed during the map task assignment, so
                // never use tracker_rtask_map
                target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else if (jointype.equals("one2mul")) {
              Integer target =
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else {
              // one-to-all case, assign tasks in the first iteration job, and remember this mapping

              // this is the first job of the series of jobs
              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }
              /*
              //this is one of the following jobs, and prepare a assignment list for the assignment
              else{
               LOG.info(job.getJobID() + " is not the first iteration job for reduce");
               if(this.rtask_assign_map.get(job.getJobID()) == null){
                //prepare the map task assignment list
                Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>();
                for(Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()){
              	  String tracker = entry.getKey();
              	  LinkedList<Integer> taskids = entry.getValue();
              	  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
              	  for(int taskid : taskids){
              		  copytaskids.add(taskid);
              	  }
              	  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
               }

              }
              */

              // the first iteration or following iteration
              // if the first iteration: assign taskid by default
              // else if the following iterations: assign taskid based on the first iteration
              // assignment
              if (newIterationJob) {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());

                if (t != null) {
                  if (!this.tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> tasklist = new LinkedList<Integer>();
                    this.tracker_rtask_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), tasklist);
                  }

                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              } else {
                Integer target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewReduceTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

                if (t != null) {
                  // poll, remove
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              }
            }
          } else {
            t =
                job.obtainNewReduceTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
          }

          LOG.info("try to assign new task " + t);
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }
  /*
   * TODO:
   * For Elf: need to change the major schedule logic, scheduling need
   * to be *datacenter-aware*
   * */
  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    scheduleMaps:

    // TODO: for Elf
    // The main schedule logic here, outer for loop is for every slot, inner loop is for each job
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = null;

          // Try to schedule a node-local or rack-local Map task
          t =
              job.obtainNewLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            ++numLocalMaps;

            // Don't assign map tasks to the hilt!
            // Leave some free slots in the cluster for future task-failures,
            // speculative tasks etc. beyond the highest priority job
            if (exceededMapPadding) {
              break scheduleMaps;
            }

            // Try all jobs again for the next Map task
            // Note: it's FIFO here: next time in the inner for loop the head-of-queue
            //  will still be chosen
            break;
          }

          // If no locality for this job, try launching non-local
          // Try to schedule a node-local or rack-local Map task --> original comments
          // FIXME: is the above comment correct? seems should be non-local task
          t =
              job.obtainNewNonLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());

          if (t != null) {
            assignedTasks.add(t);
            ++numNonLocalMaps;

            // We assign at most 1 off-switch or speculative task
            // This is to prevent TaskTrackers from stealing local-tasks
            // from other TaskTrackers.
            break scheduleMaps;
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    if (availableReduceSlots > 0) {
      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            continue;
          }

          Task t =
              job.obtainNewReduceTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }
  /** Check refreshNodes for decommissioning blacklisted nodes. */
  public void testBlacklistedNodeDecommissioning() throws Exception {
    LOG.info("Testing blacklisted node decommissioning");
    MiniMRCluster mr = null;
    JobTracker jt = null;

    try {
      // start mini mr
      JobConf jtConf = new JobConf();
      jtConf.set("mapred.max.tracker.blacklists", "1");
      mr = new MiniMRCluster(0, 0, 2, "file:///", 1, null, null, null, jtConf);
      jt = mr.getJobTrackerRunner().getJobTracker();

      assertEquals("Trackers not up", 2, jt.taskTrackers().size());
      // validate the total tracker count
      assertEquals(
          "Active tracker count mismatch", 2, jt.getClusterStatus(false).getTaskTrackers());
      // validate blacklisted count
      assertEquals(
          "Blacklisted tracker count mismatch",
          0,
          jt.getClusterStatus(false).getBlacklistedTrackers());

      // run a failing job to blacklist the tracker
      JobConf jConf = mr.createJobConf();
      jConf.set("mapred.max.tracker.failures", "1");
      jConf.setJobName("test-job-fail-once");
      jConf.setMapperClass(FailOnceMapper.class);
      jConf.setReducerClass(IdentityReducer.class);
      jConf.setNumMapTasks(1);
      jConf.setNumReduceTasks(0);

      RunningJob job =
          UtilsForTests.runJob(jConf, new Path(TEST_DIR, "in"), new Path(TEST_DIR, "out"));
      job.waitForCompletion();

      // validate the total tracker count
      assertEquals(
          "Active tracker count mismatch", 1, jt.getClusterStatus(false).getTaskTrackers());
      // validate blacklisted count
      assertEquals(
          "Blacklisted tracker count mismatch",
          1,
          jt.getClusterStatus(false).getBlacklistedTrackers());

      // find the blacklisted tracker
      String trackerName = null;
      for (TaskTrackerStatus status : jt.taskTrackers()) {
        if (jt.isBlacklisted(status.getTrackerName())) {
          trackerName = status.getTrackerName();
          break;
        }
      }
      // get the hostname
      String hostToDecommission = JobInProgress.convertTrackerNameToHostName(trackerName);
      LOG.info("Decommissioning tracker " + hostToDecommission);

      // decommission the node
      HashSet<String> decom = new HashSet<String>(1);
      decom.add(hostToDecommission);
      jt.decommissionNodes(decom);

      // validate
      // check the cluster status and tracker size
      assertEquals(
          "Tracker is not lost upon host decommissioning",
          1,
          jt.getClusterStatus(false).getTaskTrackers());
      assertEquals(
          "Blacklisted tracker count incorrect in cluster status " + "after decommissioning",
          0,
          jt.getClusterStatus(false).getBlacklistedTrackers());
      assertEquals("Tracker is not lost upon host decommissioning", 1, jt.taskTrackers().size());
    } finally {
      if (mr != null) {
        mr.shutdown();
        mr = null;
        jt = null;
        FileUtil.fullyDelete(new File(TEST_DIR.toString()));
      }
    }
  }