Java ClusterStatus.getMaxMapTasks Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapred

Class/Type: ClusterStatus

Method/Function: getMaxMapTasks

Examples at hotexamples.com: 5

Java ClusterStatus.getMaxMapTasks - 5 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapred.ClusterStatus.getMaxMapTasks extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getTaskTrackers(11)

getMaxReduceTasks(6)

getMaxMapTasks(5)

getJobTrackerState(3)

getJobTrackerStatus(2)

getMapTasks(2)

getReduceTasks(2)

getActiveTrackerNames(1)

Example #1

Show file

File: HiveServer.java Project: hirohanin/hive0.4.1hadoop21

    /** Return the status information about the Map-Reduce cluster */
    public HiveClusterStatus getClusterStatus() throws HiveServerException, TException {
      HiveClusterStatus hcs;
      try {
        ClusterStatus cs = driver.getClusterStatus();
        JobTracker.State jbs = cs.getJobTrackerState();

        // Convert the ClusterStatus to its Thrift equivalent: HiveClusterStatus
        int state;
        switch (jbs) {
          case INITIALIZING:
            state = JobTrackerState.INITIALIZING;
            break;
          case RUNNING:
            state = JobTrackerState.RUNNING;
            break;
          default:
            String errorMsg = "Unrecognized JobTracker state: " + jbs.toString();
            throw new Exception(errorMsg);
        }

        hcs =
            new HiveClusterStatus(
                cs.getTaskTrackers(),
                cs.getMapTasks(),
                cs.getReduceTasks(),
                cs.getMaxMapTasks(),
                cs.getMaxReduceTasks(),
                state);
      } catch (Exception e) {
        LOG.error(e.toString());
        e.printStackTrace();
        throw new HiveServerException("Unable to get cluster status: " + e.toString());
      }
      return hcs;
    }

Example #2

Show file

File: JoinableDataTaskScheduler.java Project: IMCG/incr-iter-hadoop

  private boolean exceededPadding(
      boolean isMapTask, ClusterStatus clusterStatus, int maxTaskTrackerSlots) {
    int numTaskTrackers = clusterStatus.getTaskTrackers();
    int totalTasks = (isMapTask) ? clusterStatus.getMapTasks() : clusterStatus.getReduceTasks();
    int totalTaskCapacity =
        isMapTask ? clusterStatus.getMaxMapTasks() : clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    boolean exceededPadding = false;
    synchronized (jobQueue) {
      int totalNeededTasks = 0;
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
          continue;
        }

        //
        // Beyond the highest-priority task, reserve a little
        // room for failures and speculative executions; don't
        // schedule tasks to the hilt.
        //
        totalNeededTasks += isMapTask ? job.desiredMaps() : job.desiredReduces();
        int padding = 0;
        if (numTaskTrackers > MIN_CLUSTER_SIZE_FOR_PADDING) {
          padding = Math.min(maxTaskTrackerSlots, (int) (totalNeededTasks * padFraction));
        }
        if (totalTasks + padding >= totalTaskCapacity) {
          exceededPadding = true;
          break;
        }
      }
    }

    return exceededPadding;
  }

Example #3

Show file

File: JoinableDataTaskScheduler.java Project: IMCG/incr-iter-hadoop

  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    boolean newIterationJob = false;
    scheduleMaps:
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          if (job.getJobConf().isIterative()) {

            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();

            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            // prepare the iterationid map and jobtask map
            if (!this.tracker_mtask_map.containsKey(iterativeAppID)) {
              // a new iterative algorithm
              Map<String, LinkedList<Integer>> new_tracker_task_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_mtask_map.put(iterativeAppID, new_tracker_task_map);

              Map<String, LinkedList<Integer>> new_tracker_rtask_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_rtask_map.put(iterativeAppID, new_tracker_rtask_map);

              // record the first job of the series of jobs in the iterations
              this.first_job_map.put(iterativeAppID, job.getJobID());

              // record the list of jobs for a iteration
              HashSet<JobID> jobs = new HashSet<JobID>();
              jobs.add(job.getJobID());
              this.iteration_jobs_map.put(iterativeAppID, jobs);
            }

            // this is the first job of the series of jobs
            if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                && job.getJobConf().isIterative()) {
              LOG.info(job.getJobID() + " is the first iteration job");
              newIterationJob = true;
            }

            // this is one of the following jobs, and prepare a assignment list for the assignment
            if (!newIterationJob) {
              LOG.info(job.getJobID() + " is not the first iteration job");
              this.iteration_jobs_map.get(iterativeAppID).add(job.getJobID());

              if (this.mtask_assign_map.get(job.getJobID()) == null) {
                // prepare the map task assignment list
                LOG.info("for job " + job.getJobID() + "'s assignment:");
                Map<String, LinkedList<Integer>> map_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  LOG.info("assign on tracker " + tracker);
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                    LOG.info("task id " + taskid);
                  }
                  map_task_assign.put(tracker, copytaskids);
                }
                this.mtask_assign_map.put(job.getJobID(), map_task_assign);

                // if one2one copy the map assign to reduce assign, the are with the same mapping
                if (jointype.equals("one2one")) {
                  // prepare the reduce task assignment list
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  for (Map.Entry<String, LinkedList<Integer>> entry :
                      this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                    String tracker = entry.getKey();
                    LinkedList<Integer> taskids = entry.getValue();
                    LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                    for (int taskid : taskids) {
                      copytaskids.add(taskid);
                    }
                    reduce_task_assign.put(tracker, copytaskids);
                  }
                  this.tracker_rtask_map.put(iterativeAppID, reduce_task_assign);
                }

                // prepare the reduce task assignment list for all cases
                Map<String, LinkedList<Integer>> reduce_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_rtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                  }
                  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
              }
            }

            Task t = null;

            // the first iteration or following iteration
            // if the first iteration: assign taskid by default (exception for the one2mul case,
            // where we assign staring from 0,...,n)
            // else if the following iterations: assign taskid based on the first iteration
            // assignment
            if (newIterationJob) {

              /**
               * the one2mul case should be carefully taken care, we want to assgin map0,map1,map2
               * and reduce0 to a tracker, and assign map3,map4,map5 and reduce1 to another tracker
               */
              if (jointype.equals("one2mul")
                  && !tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {

                // if contain the tracker, that means we have assigned tasks for this tracker

                int scala = job.getJobConf().getInt("mapred.iterative.data.scala", 1);
                // int mapsEachTracker = job.getJobConf().getNumMapTasks() / numTaskTrackers;
                int reducersEachTracker = job.getJobConf().getNumReduceTasks() / numTaskTrackers;
                if (job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0)
                  throw new IOException(
                      "job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0");

                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }
                if (!this.tracker_rtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                // for debugging
                String debugout1 = "maps: ";
                String debugout2 = "reduces: ";

                int reduceOffsetId =
                    (tracker_rtask_map.get(iterativeAppID).size() - 1)
                        * reducersEachTracker; // the start reduce id

                for (int count = 0; count < reducersEachTracker; count++) {
                  int reducepartitionid = reduceOffsetId + count;
                  debugout2 += reducepartitionid + " ";
                  tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(reducepartitionid);

                  for (int count2 = 0; count2 < scala; count2++) {
                    int mappartitionid = reducepartitionid * scala + count2;
                    // int mapid = job.splitTaskMap.get(mappartitionid);
                    debugout1 += mappartitionid + " ";
                    this.tracker_mtask_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .add(mappartitionid);
                  }
                }

                // print out for debug
                LOG.info(
                    "tracker "
                        + taskTracker.getTrackerName()
                        + " assigned tasks "
                        + debugout1
                        + " and "
                        + debugout2);

                // make the assignment list
                String tracker = taskTracker.getTrackerName();
                LinkedList<Integer> mtaskids =
                    this.tracker_mtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> mcopytaskids = new LinkedList<Integer>();
                for (int taskid : mtaskids) {
                  mcopytaskids.add(taskid);
                }
                if (!mtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> map_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.mtask_assign_map.put(job.getJobID(), map_task_assign);
                }
                this.mtask_assign_map.get(job.getJobID()).put(tracker, mcopytaskids);

                // prepare the reduce task assignment list
                LinkedList<Integer> rtaskids =
                    this.tracker_rtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> rcopytaskids = new LinkedList<Integer>();
                for (int taskid : rtaskids) {
                  rcopytaskids.add(taskid);
                }
                if (!rtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
                }
                this.rtask_assign_map.get(job.getJobID()).put(tracker, rcopytaskids);

                // assign a map task for this tracker
                Integer target = null;
                try {
                  target =
                      this.mtask_assign_map
                          .get(job.getJobID())
                          .get(taskTracker.getTrackerName())
                          .peekFirst();
                } catch (Exception e) {
                  e.printStackTrace();
                }

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewNodeOrRackLocalMapTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());
              }

            } else {
              Integer target = null;
              try {
                target =
                    this.mtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();
              } catch (Exception e) {
                e.printStackTrace();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // new iteration job and the first task for a tasktracker
              // for one2mul case, we don't need to record the assignment, since we already made the
              // assignment list beforehand
              if (!newIterationJob || jointype.equals("one2mul")) {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              } else {
                // record the assignment list for map tasks
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());

                // prepare the reduce assignment, for mapping with reduce
                if (jointype.equals("one2one")) {
                  // prepare the reduce assignment, for mapping with reduce
                  if (!first_job_reduces_map.containsKey(iterativeAppID)) {
                    Map<String, LinkedList<Integer>> tracker_reduce_map =
                        new HashMap<String, LinkedList<Integer>>();
                    first_job_reduces_map.put(iterativeAppID, tracker_reduce_map);
                  }

                  if (!first_job_reduces_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> reduces = new LinkedList<Integer>();
                    first_job_reduces_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), reduces);
                  }

                  first_job_reduces_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                }

                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              }

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            LOG.error("New Node Or Rack Local Map Task failed!");

            if (newIterationJob) {
              // Try to schedule a node-local or rack-local Map task
              t =
                  job.obtainNewNonLocalMapTask(
                      taskTrackerStatus,
                      numTaskTrackers,
                      taskTrackerManager.getNumberOfUniqueHosts());
            } else {
              Integer target =
                  this.mtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .peekFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNonLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // new iteration job and the first task for a tasktracker
              if (newIterationJob) {
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());
              } else {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
              }

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          } else {
            // not an iterative algorithm, normal schedule
            Task t = null;

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNodeOrRackLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNonLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    /** should maintain the reduce task location for the termination check */
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    // LOG.info("availableReduceSlots " + availableReduceSlots);
    if (availableReduceSlots > 0) {

      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          LOG.info("job " + job.getJobID());
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            LOG.info("have to continue " + job.getStatus().getRunState());
            continue;
          }

          Task t = null;

          if (job.getJobConf().isIterative()) {
            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();
            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            if (jointype.equals("one2one")) {
              // one-to-one or one-to-mul jobs

              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                  && job.getJobConf().isIterative()) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }

              Integer target = null;
              if (newIterationJob) {

                if (first_job_reduces_map.get(iterativeAppID) == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID);
                }

                if (first_job_reduces_map.get(iterativeAppID).get(taskTracker.getTrackerName())
                    == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID
                          + " from "
                          + taskTracker.getTrackerName());
                }

                target =
                    this.first_job_reduces_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              } else {
                // the task assignment has already been processed during the map task assignment, so
                // never use tracker_rtask_map
                target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else if (jointype.equals("one2mul")) {
              Integer target =
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else {
              // one-to-all case, assign tasks in the first iteration job, and remember this mapping

              // this is the first job of the series of jobs
              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }
              /*
              //this is one of the following jobs, and prepare a assignment list for the assignment
              else{
               LOG.info(job.getJobID() + " is not the first iteration job for reduce");
               if(this.rtask_assign_map.get(job.getJobID()) == null){
                //prepare the map task assignment list
                Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>();
                for(Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()){
              	  String tracker = entry.getKey();
              	  LinkedList<Integer> taskids = entry.getValue();
              	  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
              	  for(int taskid : taskids){
              		  copytaskids.add(taskid);
              	  }
              	  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
               }

              }
              */

              // the first iteration or following iteration
              // if the first iteration: assign taskid by default
              // else if the following iterations: assign taskid based on the first iteration
              // assignment
              if (newIterationJob) {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());

                if (t != null) {
                  if (!this.tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> tasklist = new LinkedList<Integer>();
                    this.tracker_rtask_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), tasklist);
                  }

                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              } else {
                Integer target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewReduceTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

                if (t != null) {
                  // poll, remove
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              }
            }
          } else {
            t =
                job.obtainNewReduceTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
          }

          LOG.info("try to assign new task " + t);
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }

Example #4

Show file

File: DatacenterAwareTaskScheduler.java Project: Ahmed-Azri/palantir

  /*
   * TODO:
   * For Elf: need to change the major schedule logic, scheduling need
   * to be *datacenter-aware*
   * */
  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    scheduleMaps:

    // TODO: for Elf
    // The main schedule logic here, outer for loop is for every slot, inner loop is for each job
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = null;

          // Try to schedule a node-local or rack-local Map task
          t =
              job.obtainNewLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            ++numLocalMaps;

            // Don't assign map tasks to the hilt!
            // Leave some free slots in the cluster for future task-failures,
            // speculative tasks etc. beyond the highest priority job
            if (exceededMapPadding) {
              break scheduleMaps;
            }

            // Try all jobs again for the next Map task
            // Note: it's FIFO here: next time in the inner for loop the head-of-queue
            //  will still be chosen
            break;
          }

          // If no locality for this job, try launching non-local
          // Try to schedule a node-local or rack-local Map task --> original comments
          // FIXME: is the above comment correct? seems should be non-local task
          t =
              job.obtainNewNonLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());

          if (t != null) {
            assignedTasks.add(t);
            ++numNonLocalMaps;

            // We assign at most 1 off-switch or speculative task
            // This is to prevent TaskTrackers from stealing local-tasks
            // from other TaskTrackers.
            break scheduleMaps;
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    if (availableReduceSlots > 0) {
      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            continue;
          }

          Task t =
              job.obtainNewReduceTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }

Example #5

Show file

File: RangeQuery.java Project: easysg/spatedb

  /**
   * Performs a range query using MapReduce
   *
   * @param fs
   * @param inputFile
   * @param queryRange
   * @param shape
   * @param output
   * @return
   * @throws IOException
   */
  public static long rangeQueryMapReduce(
      FileSystem fs,
      Path inputFile,
      Path userOutputPath,
      Shape queryShape,
      Shape shape,
      boolean overwrite,
      boolean background,
      QueryInput query)
      throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
      do {
        outputPath =
            new Path(
                inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
      } while (outFs.exists(outputPath));
    } else {
      if (outFs.exists(outputPath)) {
        if (overwrite) {
          outFs.delete(outputPath, true);
        } else {
          throw new RuntimeException("Output path already exists and -overwrite flag is not set");
        }
      }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
      // RTree indexed file
      LOG.info("Searching an RTree indexed file");
      job.setInputFormat(RTreeInputFormat.class);
    } else {
      // A file with no local index
      LOG.info("Searching a non local-indexed file");
      job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
      Class<?> c = shape.getClass();
      Field f = c.getDeclaredField(query.field);
      f.setAccessible(true);
      if (f.getType().equals(Integer.TYPE)) {
        OutputKey = IntWritable.class;
      } else if (f.getType().equals(Double.TYPE)) {
        OutputKey = DoubleWritable.class;
      } else if (f.getType().equals(Long.TYPE)) {
        OutputKey = LongWritable.class;
      }
    } catch (SecurityException e) {
      e.printStackTrace();
    } catch (NoSuchFieldException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
      case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
      case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
      default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
      RunningJob runningJob = JobClient.runJob(job);
      Counters counters = runningJob.getCounters();
      Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
      final long resultCount = outputRecordCounter.getValue();

      // If outputPath not set by user, automatically delete it
      if (userOutputPath == null) outFs.delete(outputPath, true);

      return resultCount;
    } else {
      JobClient jc = new JobClient(job);
      lastRunningJob = jc.submitJob(job);
      return -1;
    }
  }