Exemplo n.º 1
0
  private String[] getActiveServersList(JobContext context) {

    String[] servers = null;
    try {
      JobClient jc = new JobClient((JobConf) context.getConfiguration());
      ClusterStatus status = jc.getClusterStatus(true);
      Collection<String> atc = status.getActiveTrackerNames();
      servers = new String[atc.size()];
      int s = 0;
      for (String serverInfo : atc) {
        // System.out.println("serverInfo:" + serverInfo);
        StringTokenizer st = new StringTokenizer(serverInfo, ":");
        String trackerName = st.nextToken();
        // System.out.println("trackerName:" + trackerName);
        StringTokenizer st1 = new StringTokenizer(trackerName, "_");
        st1.nextToken();
        servers[s++] = st1.nextToken();
      }

    } catch (IOException e) {
      e.printStackTrace();
    }

    return servers;
  }
Exemplo n.º 2
0
 /**
  * Generate the requested number of file splits, with the filename set to the filename of the
  * output file.
  */
 public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
   /** 设置输入分片的个数* */
   JobClient client = new JobClient(job);
   ClusterStatus cluster = client.getClusterStatus();
   /** 如果属性不存在 则返回默认的值 * */
   int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
   long numBytesToWritePerMap =
       job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
   if (numBytesToWritePerMap == 0) {
     System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
   }
   long totalBytesToWrite =
       job.getLong(
           "test.randomwrite.total_bytes",
           numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
   int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
   if (numMaps == 0 && totalBytesToWrite > 0) {
     numMaps = 1;
   }
   System.out.println("numMaps-------" + numMaps);
   InputSplit[] result = new InputSplit[numMaps];
   Path outDir = FileOutputFormat.getOutputPath(job);
   for (int i = 0; i < result.length; ++i) {
     result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[]) null);
   }
   return result;
 }
Exemplo n.º 3
0
  /**
   * This is the main routine for launching a distributed random write job. It runs 10 maps/node and
   * each node writes 1 gig of data to a DFS file. The reduce doesn't do anything.
   *
   * @throws IOException
   */
  public int run(String[] args) throws Exception {
    if (args.length == 0) {
      System.out.println("Usage: writer <out-dir>");
      ToolRunner.printGenericCommandUsage(System.out);
      return -1;
    }

    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    /** 如果属性不存在 则返回默认的值 * */
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap =
        job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
      System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
      return -2;
    }
    long totalBytesToWrite =
        job.getLong(
            "test.randomwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
      numMaps = 1;
      job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }

    job.setNumMapTasks(numMaps);
    /** 建议型的 * */
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println(
        "The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return 0;
  }
Exemplo n.º 4
0
 @Override
 public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception {
   switch (clusterStatus.getJobTrackerStatus()) {
     case INITIALIZING:
       return JobTrackerState.INITIALIZING;
     case RUNNING:
       return JobTrackerState.RUNNING;
     default:
       String errorMsg = "Unrecognized JobTracker state: " + clusterStatus.getJobTrackerStatus();
       throw new Exception(errorMsg);
   }
 }
Exemplo n.º 5
0
    /** Return the status information about the Map-Reduce cluster */
    public HiveClusterStatus getClusterStatus() throws HiveServerException, TException {
      HiveClusterStatus hcs;
      try {
        ClusterStatus cs = driver.getClusterStatus();
        JobTracker.State jbs = cs.getJobTrackerState();

        // Convert the ClusterStatus to its Thrift equivalent: HiveClusterStatus
        int state;
        switch (jbs) {
          case INITIALIZING:
            state = JobTrackerState.INITIALIZING;
            break;
          case RUNNING:
            state = JobTrackerState.RUNNING;
            break;
          default:
            String errorMsg = "Unrecognized JobTracker state: " + jbs.toString();
            throw new Exception(errorMsg);
        }

        hcs =
            new HiveClusterStatus(
                cs.getTaskTrackers(),
                cs.getMapTasks(),
                cs.getReduceTasks(),
                cs.getMaxMapTasks(),
                cs.getMaxReduceTasks(),
                state);
      } catch (Exception e) {
        LOG.error(e.toString());
        e.printStackTrace();
        throw new HiveServerException("Unable to get cluster status: " + e.toString());
      }
      return hcs;
    }
Exemplo n.º 6
0
  void startJobTracker(boolean wait) {
    //  Create the JobTracker
    jobTracker = new JobTrackerRunner(conf);
    jobTrackerThread = new Thread(jobTracker);

    jobTrackerThread.start();

    if (!wait) {
      return;
    }

    while (jobTracker.isActive() && !jobTracker.isUp()) {
      try { // let daemons get started
        Thread.sleep(1000);
      } catch (InterruptedException e) {
      }
    }

    // is the jobtracker has started then wait for it to init
    ClusterStatus status = null;
    if (jobTracker.isUp()) {
      status = jobTracker.getJobTracker().getClusterStatus(false);
      while (jobTracker.isActive()
          && status.getJobTrackerState() == JobTracker.State.INITIALIZING) {
        try {
          LOG.info("JobTracker still initializing. Waiting.");
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        status = jobTracker.getJobTracker().getClusterStatus(false);
      }
    }

    if (!jobTracker.isActive()) {
      // return if jobtracker has crashed
      return;
    }

    // Set the configuration for the task-trackers
    this.jobTrackerPort = jobTracker.getJobTrackerPort();
    this.jobTrackerInfoPort = jobTracker.getJobTrackerInfoPort();
  }
Exemplo n.º 7
0
 @Override
 public Task assignTask(TaskTrackerStatus tts, long currentTime, Collection<JobInProgress> visited)
     throws IOException {
   if (isRunnable()) {
     visited.add(job);
     TaskTrackerManager ttm = scheduler.taskTrackerManager;
     ClusterStatus clusterStatus = ttm.getClusterStatus();
     int numTaskTrackers = clusterStatus.getTaskTrackers();
     if (taskType == TaskType.MAP) {
       LocalityLevel localityLevel = scheduler.getAllowedLocalityLevel(job, currentTime);
       scheduler.getEventLog().log("ALLOWED_LOC_LEVEL", job.getJobID(), localityLevel);
       // obtainNewMapTask needs to be passed 1 + the desired locality level
       return job.obtainNewMapTask(
           tts, numTaskTrackers, ttm.getNumberOfUniqueHosts(), localityLevel.toCacheLevelCap());
     } else {
       return job.obtainNewReduceTask(tts, numTaskTrackers, ttm.getNumberOfUniqueHosts());
     }
   } else {
     return null;
   }
 }
Exemplo n.º 8
0
  /** Wait until the system is idle. */
  public void waitUntilIdle() {
    waitTaskTrackers();

    JobClient client;
    try {
      client = new JobClient(job);
      ClusterStatus status = client.getClusterStatus();
      while (status.getTaskTrackers() + numTrackerToExclude < taskTrackerList.size()) {
        for (TaskTrackerRunner runner : taskTrackerList) {
          if (runner.isDead) {
            throw new RuntimeException("TaskTracker is dead");
          }
        }
        Thread.sleep(1000);
        status = client.getClusterStatus();
      }
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    } catch (InterruptedException ex) {
      throw new RuntimeException(ex);
    }
  }
  /** When no input dir is specified, generate random data. */
  protected static void confRandom(JobConf job) throws IOException {
    // from RandomWriter
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(RandomMapOutput.class);

    final ClusterStatus cluster = new JobClient(job).getClusterStatus();
    int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
    long numBytesToWritePerMap =
        job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
      throw new IOException("Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
    }
    long totalBytesToWrite =
        job.getLong(
            RandomTextWriter.TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
      numMaps = 1;
      job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
    }
    job.setNumMapTasks(numMaps);
  }
  private boolean exceededPadding(
      boolean isMapTask, ClusterStatus clusterStatus, int maxTaskTrackerSlots) {
    int numTaskTrackers = clusterStatus.getTaskTrackers();
    int totalTasks = (isMapTask) ? clusterStatus.getMapTasks() : clusterStatus.getReduceTasks();
    int totalTaskCapacity =
        isMapTask ? clusterStatus.getMaxMapTasks() : clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    boolean exceededPadding = false;
    synchronized (jobQueue) {
      int totalNeededTasks = 0;
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
          continue;
        }

        //
        // Beyond the highest-priority task, reserve a little
        // room for failures and speculative executions; don't
        // schedule tasks to the hilt.
        //
        totalNeededTasks += isMapTask ? job.desiredMaps() : job.desiredReduces();
        int padding = 0;
        if (numTaskTrackers > MIN_CLUSTER_SIZE_FOR_PADDING) {
          padding = Math.min(maxTaskTrackerSlots, (int) (totalNeededTasks * padFraction));
        }
        if (totalTasks + padding >= totalTaskCapacity) {
          exceededPadding = true;
          break;
        }
      }
    }

    return exceededPadding;
  }
  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    boolean newIterationJob = false;
    scheduleMaps:
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          if (job.getJobConf().isIterative()) {

            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();

            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            // prepare the iterationid map and jobtask map
            if (!this.tracker_mtask_map.containsKey(iterativeAppID)) {
              // a new iterative algorithm
              Map<String, LinkedList<Integer>> new_tracker_task_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_mtask_map.put(iterativeAppID, new_tracker_task_map);

              Map<String, LinkedList<Integer>> new_tracker_rtask_map =
                  new HashMap<String, LinkedList<Integer>>();
              this.tracker_rtask_map.put(iterativeAppID, new_tracker_rtask_map);

              // record the first job of the series of jobs in the iterations
              this.first_job_map.put(iterativeAppID, job.getJobID());

              // record the list of jobs for a iteration
              HashSet<JobID> jobs = new HashSet<JobID>();
              jobs.add(job.getJobID());
              this.iteration_jobs_map.put(iterativeAppID, jobs);
            }

            // this is the first job of the series of jobs
            if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                && job.getJobConf().isIterative()) {
              LOG.info(job.getJobID() + " is the first iteration job");
              newIterationJob = true;
            }

            // this is one of the following jobs, and prepare a assignment list for the assignment
            if (!newIterationJob) {
              LOG.info(job.getJobID() + " is not the first iteration job");
              this.iteration_jobs_map.get(iterativeAppID).add(job.getJobID());

              if (this.mtask_assign_map.get(job.getJobID()) == null) {
                // prepare the map task assignment list
                LOG.info("for job " + job.getJobID() + "'s assignment:");
                Map<String, LinkedList<Integer>> map_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  LOG.info("assign on tracker " + tracker);
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                    LOG.info("task id " + taskid);
                  }
                  map_task_assign.put(tracker, copytaskids);
                }
                this.mtask_assign_map.put(job.getJobID(), map_task_assign);

                // if one2one copy the map assign to reduce assign, the are with the same mapping
                if (jointype.equals("one2one")) {
                  // prepare the reduce task assignment list
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  for (Map.Entry<String, LinkedList<Integer>> entry :
                      this.tracker_mtask_map.get(iterativeAppID).entrySet()) {
                    String tracker = entry.getKey();
                    LinkedList<Integer> taskids = entry.getValue();
                    LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                    for (int taskid : taskids) {
                      copytaskids.add(taskid);
                    }
                    reduce_task_assign.put(tracker, copytaskids);
                  }
                  this.tracker_rtask_map.put(iterativeAppID, reduce_task_assign);
                }

                // prepare the reduce task assignment list for all cases
                Map<String, LinkedList<Integer>> reduce_task_assign =
                    new HashMap<String, LinkedList<Integer>>();
                for (Map.Entry<String, LinkedList<Integer>> entry :
                    this.tracker_rtask_map.get(iterativeAppID).entrySet()) {
                  String tracker = entry.getKey();
                  LinkedList<Integer> taskids = entry.getValue();
                  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
                  for (int taskid : taskids) {
                    copytaskids.add(taskid);
                  }
                  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
              }
            }

            Task t = null;

            // the first iteration or following iteration
            // if the first iteration: assign taskid by default (exception for the one2mul case,
            // where we assign staring from 0,...,n)
            // else if the following iterations: assign taskid based on the first iteration
            // assignment
            if (newIterationJob) {

              /**
               * the one2mul case should be carefully taken care, we want to assgin map0,map1,map2
               * and reduce0 to a tracker, and assign map3,map4,map5 and reduce1 to another tracker
               */
              if (jointype.equals("one2mul")
                  && !tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {

                // if contain the tracker, that means we have assigned tasks for this tracker

                int scala = job.getJobConf().getInt("mapred.iterative.data.scala", 1);
                // int mapsEachTracker = job.getJobConf().getNumMapTasks() / numTaskTrackers;
                int reducersEachTracker = job.getJobConf().getNumReduceTasks() / numTaskTrackers;
                if (job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0)
                  throw new IOException(
                      "job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0");

                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }
                if (!this.tracker_rtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                // for debugging
                String debugout1 = "maps: ";
                String debugout2 = "reduces: ";

                int reduceOffsetId =
                    (tracker_rtask_map.get(iterativeAppID).size() - 1)
                        * reducersEachTracker; // the start reduce id

                for (int count = 0; count < reducersEachTracker; count++) {
                  int reducepartitionid = reduceOffsetId + count;
                  debugout2 += reducepartitionid + " ";
                  tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(reducepartitionid);

                  for (int count2 = 0; count2 < scala; count2++) {
                    int mappartitionid = reducepartitionid * scala + count2;
                    // int mapid = job.splitTaskMap.get(mappartitionid);
                    debugout1 += mappartitionid + " ";
                    this.tracker_mtask_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .add(mappartitionid);
                  }
                }

                // print out for debug
                LOG.info(
                    "tracker "
                        + taskTracker.getTrackerName()
                        + " assigned tasks "
                        + debugout1
                        + " and "
                        + debugout2);

                // make the assignment list
                String tracker = taskTracker.getTrackerName();
                LinkedList<Integer> mtaskids =
                    this.tracker_mtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> mcopytaskids = new LinkedList<Integer>();
                for (int taskid : mtaskids) {
                  mcopytaskids.add(taskid);
                }
                if (!mtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> map_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.mtask_assign_map.put(job.getJobID(), map_task_assign);
                }
                this.mtask_assign_map.get(job.getJobID()).put(tracker, mcopytaskids);

                // prepare the reduce task assignment list
                LinkedList<Integer> rtaskids =
                    this.tracker_rtask_map.get(iterativeAppID).get(taskTracker.getTrackerName());
                LinkedList<Integer> rcopytaskids = new LinkedList<Integer>();
                for (int taskid : rtaskids) {
                  rcopytaskids.add(taskid);
                }
                if (!rtask_assign_map.containsKey(job.getJobID())) {
                  Map<String, LinkedList<Integer>> reduce_task_assign =
                      new HashMap<String, LinkedList<Integer>>();
                  this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
                }
                this.rtask_assign_map.get(job.getJobID()).put(tracker, rcopytaskids);

                // assign a map task for this tracker
                Integer target = null;
                try {
                  target =
                      this.mtask_assign_map
                          .get(job.getJobID())
                          .get(taskTracker.getTrackerName())
                          .peekFirst();
                } catch (Exception e) {
                  e.printStackTrace();
                }

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewNodeOrRackLocalMapTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());
              }

            } else {
              Integer target = null;
              try {
                target =
                    this.mtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();
              } catch (Exception e) {
                e.printStackTrace();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNodeOrRackLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // new iteration job and the first task for a tasktracker
              // for one2mul case, we don't need to record the assignment, since we already made the
              // assignment list beforehand
              if (!newIterationJob || jointype.equals("one2mul")) {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              } else {
                // record the assignment list for map tasks
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());

                // prepare the reduce assignment, for mapping with reduce
                if (jointype.equals("one2one")) {
                  // prepare the reduce assignment, for mapping with reduce
                  if (!first_job_reduces_map.containsKey(iterativeAppID)) {
                    Map<String, LinkedList<Integer>> tracker_reduce_map =
                        new HashMap<String, LinkedList<Integer>>();
                    first_job_reduces_map.put(iterativeAppID, tracker_reduce_map);
                  }

                  if (!first_job_reduces_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> reduces = new LinkedList<Integer>();
                    first_job_reduces_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), reduces);
                  }

                  first_job_reduces_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                }

                LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName());
              }

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            LOG.error("New Node Or Rack Local Map Task failed!");

            if (newIterationJob) {
              // Try to schedule a node-local or rack-local Map task
              t =
                  job.obtainNewNonLocalMapTask(
                      taskTrackerStatus,
                      numTaskTrackers,
                      taskTrackerManager.getNumberOfUniqueHosts());
            } else {
              Integer target =
                  this.mtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .peekFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all map tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewNonLocalMapTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            }

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // new iteration job and the first task for a tasktracker
              if (newIterationJob) {
                if (!this.tracker_mtask_map
                    .get(iterativeAppID)
                    .containsKey(taskTracker.getTrackerName())) {
                  LinkedList<Integer> tasklist = new LinkedList<Integer>();
                  this.tracker_mtask_map
                      .get(iterativeAppID)
                      .put(taskTracker.getTrackerName(), tasklist);
                }

                this.tracker_mtask_map
                    .get(iterativeAppID)
                    .get(taskTracker.getTrackerName())
                    .add(t.getTaskID().getTaskID().getId());
              } else {
                // poll, remove
                this.mtask_assign_map
                    .get(job.getJobID())
                    .get(taskTracker.getTrackerName())
                    .pollFirst();
              }

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          } else {
            // not an iterative algorithm, normal schedule
            Task t = null;

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNodeOrRackLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
            if (t != null) {
              assignedTasks.add(t);
              ++numLocalMaps;

              // Don't assign map tasks to the hilt!
              // Leave some free slots in the cluster for future task-failures,
              // speculative tasks etc. beyond the highest priority job
              if (exceededMapPadding) {
                break scheduleMaps;
              }

              // Try all jobs again for the next Map task
              break;
            }

            // Try to schedule a node-local or rack-local Map task
            t =
                job.obtainNewNonLocalMapTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());

            if (t != null) {
              assignedTasks.add(t);
              ++numNonLocalMaps;

              // We assign at most 1 off-switch or speculative task
              // This is to prevent TaskTrackers from stealing local-tasks
              // from other TaskTrackers.
              break scheduleMaps;
            }
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    /** should maintain the reduce task location for the termination check */
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    // LOG.info("availableReduceSlots " + availableReduceSlots);
    if (availableReduceSlots > 0) {

      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          LOG.info("job " + job.getJobID());
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            LOG.info("have to continue " + job.getStatus().getRunState());
            continue;
          }

          Task t = null;

          if (job.getJobConf().isIterative()) {
            String iterativeAppID = job.getJobConf().getIterativeAlgorithmID();
            if (iterativeAppID.equals("none")) {
              throw new IOException("please specify the iteration ID!");
            }

            String jointype = job.getJobConf().get("mapred.iterative.jointype");

            if (jointype.equals("one2one")) {
              // one-to-one or one-to-mul jobs

              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())
                  && job.getJobConf().isIterative()) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }

              Integer target = null;
              if (newIterationJob) {

                if (first_job_reduces_map.get(iterativeAppID) == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID);
                }

                if (first_job_reduces_map.get(iterativeAppID).get(taskTracker.getTrackerName())
                    == null) {
                  throw new IOException(
                      "I think something is wrong since the tasktracker never receive "
                          + "a map task with iterativeapp id "
                          + iterativeAppID
                          + " from "
                          + taskTracker.getTrackerName());
                }

                target =
                    this.first_job_reduces_map
                        .get(iterativeAppID)
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              } else {
                // the task assignment has already been processed during the map task assignment, so
                // never use tracker_rtask_map
                target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .pollFirst();
              }

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else if (jointype.equals("one2mul")) {
              Integer target =
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();

              if (target == null) {
                // all have been assigned, no more work, maybe it should help others to process
                LOG.info(
                    "all reduce tasks on tasktracker "
                        + taskTracker.getTrackerName()
                        + " have been processed");
                break;
              } else {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts(),
                        target);
              }
            } else {
              // one-to-all case, assign tasks in the first iteration job, and remember this mapping

              // this is the first job of the series of jobs
              if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())) {
                LOG.info(job.getJobID() + " is the first iteration job for reduce");
                newIterationJob = true;
              }
              /*
              //this is one of the following jobs, and prepare a assignment list for the assignment
              else{
               LOG.info(job.getJobID() + " is not the first iteration job for reduce");
               if(this.rtask_assign_map.get(job.getJobID()) == null){
                //prepare the map task assignment list
                Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>();
                for(Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()){
              	  String tracker = entry.getKey();
              	  LinkedList<Integer> taskids = entry.getValue();
              	  LinkedList<Integer> copytaskids = new LinkedList<Integer>();
              	  for(int taskid : taskids){
              		  copytaskids.add(taskid);
              	  }
              	  reduce_task_assign.put(tracker, copytaskids);
                }
                this.rtask_assign_map.put(job.getJobID(), reduce_task_assign);
               }

              }
              */

              // the first iteration or following iteration
              // if the first iteration: assign taskid by default
              // else if the following iterations: assign taskid based on the first iteration
              // assignment
              if (newIterationJob) {
                t =
                    job.obtainNewReduceTask(
                        taskTrackerStatus,
                        numTaskTrackers,
                        taskTrackerManager.getNumberOfUniqueHosts());

                if (t != null) {
                  if (!this.tracker_rtask_map
                      .get(iterativeAppID)
                      .containsKey(taskTracker.getTrackerName())) {
                    LinkedList<Integer> tasklist = new LinkedList<Integer>();
                    this.tracker_rtask_map
                        .get(iterativeAppID)
                        .put(taskTracker.getTrackerName(), tasklist);
                  }

                  this.tracker_rtask_map
                      .get(iterativeAppID)
                      .get(taskTracker.getTrackerName())
                      .add(t.getTaskID().getTaskID().getId());
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              } else {
                Integer target =
                    this.rtask_assign_map
                        .get(job.getJobID())
                        .get(taskTracker.getTrackerName())
                        .peekFirst();

                if (target == null) {
                  // all have been assigned, no more work, maybe it should help others to process
                  LOG.info(
                      "all map tasks on tasktracker "
                          + taskTracker.getTrackerName()
                          + " have been processed");
                  break;
                } else {
                  t =
                      job.obtainNewReduceTask(
                          taskTrackerStatus,
                          numTaskTrackers,
                          taskTrackerManager.getNumberOfUniqueHosts(),
                          target);
                }

                if (t != null) {
                  // poll, remove
                  this.rtask_assign_map
                      .get(job.getJobID())
                      .get(taskTracker.getTrackerName())
                      .pollFirst();
                  LOG.info(
                      "assigning reduce task "
                          + t.getTaskID()
                          + " on "
                          + taskTracker.getTrackerName());
                }
              }
            }
          } else {
            t =
                job.obtainNewReduceTask(
                    taskTrackerStatus,
                    numTaskTrackers,
                    taskTrackerManager.getNumberOfUniqueHosts());
          }

          LOG.info("try to assign new task " + t);
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }
  /*
   * TODO:
   * For Elf: need to change the major schedule logic, scheduling need
   * to be *datacenter-aware*
   * */
  @Override
  public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException {
    TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus();
    ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus();
    final int numTaskTrackers = clusterStatus.getTaskTrackers();
    final int clusterMapCapacity = clusterStatus.getMaxMapTasks();
    final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks();

    Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue();

    //
    // Get map + reduce counts for the current tracker.
    //
    final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots();
    final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots();
    final int trackerRunningMaps = taskTrackerStatus.countMapTasks();
    final int trackerRunningReduces = taskTrackerStatus.countReduceTasks();

    // Assigned tasks
    List<Task> assignedTasks = new ArrayList<Task>();

    //
    // Compute (running + pending) map and reduce task numbers across pool
    //
    int remainingReduceLoad = 0;
    int remainingMapLoad = 0;
    synchronized (jobQueue) {
      for (JobInProgress job : jobQueue) {
        if (job.getStatus().getRunState() == JobStatus.RUNNING) {
          remainingMapLoad += (job.desiredMaps() - job.finishedMaps());
          if (job.scheduleReduces()) {
            remainingReduceLoad += (job.desiredReduces() - job.finishedReduces());
          }
        }
      }
    }

    // Compute the 'load factor' for maps and reduces
    double mapLoadFactor = 0.0;
    if (clusterMapCapacity > 0) {
      mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity;
    }
    double reduceLoadFactor = 0.0;
    if (clusterReduceCapacity > 0) {
      reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity;
    }

    //
    // In the below steps, we allocate first map tasks (if appropriate),
    // and then reduce tasks if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We assign tasks to the current taskTracker if the given machine
    // has a workload that's less than the maximum load of that kind of
    // task.
    // However, if the cluster is close to getting loaded i.e. we don't
    // have enough _padding_ for speculative executions etc., we only
    // schedule the "highest priority" task i.e. the task from the job
    // with the highest priority.
    //

    final int trackerCurrentMapCapacity =
        Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity);
    int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;
    boolean exceededMapPadding = false;
    if (availableMapSlots > 0) {
      exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity);
    }

    int numLocalMaps = 0;
    int numNonLocalMaps = 0;
    scheduleMaps:

    // TODO: for Elf
    // The main schedule logic here, outer for loop is for every slot, inner loop is for each job
    for (int i = 0; i < availableMapSlots; ++i) {
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = null;

          // Try to schedule a node-local or rack-local Map task
          t =
              job.obtainNewLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            ++numLocalMaps;

            // Don't assign map tasks to the hilt!
            // Leave some free slots in the cluster for future task-failures,
            // speculative tasks etc. beyond the highest priority job
            if (exceededMapPadding) {
              break scheduleMaps;
            }

            // Try all jobs again for the next Map task
            // Note: it's FIFO here: next time in the inner for loop the head-of-queue
            //  will still be chosen
            break;
          }

          // If no locality for this job, try launching non-local
          // Try to schedule a node-local or rack-local Map task --> original comments
          // FIXME: is the above comment correct? seems should be non-local task
          t =
              job.obtainNewNonLocalMapTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());

          if (t != null) {
            assignedTasks.add(t);
            ++numNonLocalMaps;

            // We assign at most 1 off-switch or speculative task
            // This is to prevent TaskTrackers from stealing local-tasks
            // from other TaskTrackers.
            break scheduleMaps;
          }
        }
      }
    }
    int assignedMaps = assignedTasks.size();

    //
    // Same thing, but for reduce tasks
    // However we _never_ assign more than 1 reduce task per heartbeat
    //
    final int trackerCurrentReduceCapacity =
        Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity);
    final int availableReduceSlots =
        Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1);
    boolean exceededReducePadding = false;
    if (availableReduceSlots > 0) {
      exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity);
      synchronized (jobQueue) {
        for (JobInProgress job : jobQueue) {
          if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) {
            continue;
          }

          Task t =
              job.obtainNewReduceTask(
                  taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts());
          if (t != null) {
            assignedTasks.add(t);
            break;
          }

          // Don't assign reduce tasks to the hilt!
          // Leave some free slots in the cluster for future task-failures,
          // speculative tasks etc. beyond the highest priority job
          if (exceededReducePadding) {
            break;
          }
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Task assignments for "
              + taskTrackerStatus.getTrackerName()
              + " --> "
              + "["
              + mapLoadFactor
              + ", "
              + trackerMapCapacity
              + ", "
              + trackerCurrentMapCapacity
              + ", "
              + trackerRunningMaps
              + "] -> ["
              + (trackerCurrentMapCapacity - trackerRunningMaps)
              + ", "
              + assignedMaps
              + " ("
              + numLocalMaps
              + ", "
              + numNonLocalMaps
              + ")] ["
              + reduceLoadFactor
              + ", "
              + trackerReduceCapacity
              + ", "
              + trackerCurrentReduceCapacity
              + ","
              + trackerRunningReduces
              + "] -> ["
              + (trackerCurrentReduceCapacity - trackerRunningReduces)
              + ", "
              + (assignedTasks.size() - assignedMaps)
              + "]");
    }

    return assignedTasks;
  }
Exemplo n.º 13
0
  /**
   * Performs a range query using MapReduce
   *
   * @param fs
   * @param inputFile
   * @param queryRange
   * @param shape
   * @param output
   * @return
   * @throws IOException
   */
  public static long rangeQueryMapReduce(
      FileSystem fs,
      Path inputFile,
      Path userOutputPath,
      Shape queryShape,
      Shape shape,
      boolean overwrite,
      boolean background,
      QueryInput query)
      throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
      do {
        outputPath =
            new Path(
                inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
      } while (outFs.exists(outputPath));
    } else {
      if (outFs.exists(outputPath)) {
        if (overwrite) {
          outFs.delete(outputPath, true);
        } else {
          throw new RuntimeException("Output path already exists and -overwrite flag is not set");
        }
      }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
      // RTree indexed file
      LOG.info("Searching an RTree indexed file");
      job.setInputFormat(RTreeInputFormat.class);
    } else {
      // A file with no local index
      LOG.info("Searching a non local-indexed file");
      job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
      Class<?> c = shape.getClass();
      Field f = c.getDeclaredField(query.field);
      f.setAccessible(true);
      if (f.getType().equals(Integer.TYPE)) {
        OutputKey = IntWritable.class;
      } else if (f.getType().equals(Double.TYPE)) {
        OutputKey = DoubleWritable.class;
      } else if (f.getType().equals(Long.TYPE)) {
        OutputKey = LongWritable.class;
      }
    } catch (SecurityException e) {
      e.printStackTrace();
    } catch (NoSuchFieldException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
      case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
      case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
      default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
      RunningJob runningJob = JobClient.runJob(job);
      Counters counters = runningJob.getCounters();
      Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
      final long resultCount = outputRecordCounter.getValue();

      // If outputPath not set by user, automatically delete it
      if (userOutputPath == null) outFs.delete(outputPath, true);

      return resultCount;
    } else {
      JobClient jc = new JobClient(job);
      lastRunningJob = jc.submitJob(job);
      return -1;
    }
  }
Exemplo n.º 14
0
  /**
   * The main driver for sort program. Invoke this method to submit the map/reduce job.
   *
   * @throws Exception When there is communication problems with the job tracker.
   */
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String join_reduces = conf.get(REDUCES_PER_HOST);
    if (join_reduces != null) {
      num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces);
    }
    Job job = new Job(conf);
    job.setJobName("join");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i])) {
          num_reduces = Integer.parseInt(args[++i]);
        } else if ("-inFormat".equals(args[i])) {
          inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
        } else if ("-outFormat".equals(args[i])) {
          outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
        } else if ("-outKey".equals(args[i])) {
          outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
        } else if ("-outValue".equals(args[i])) {
          outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
        } else if ("-joinOp".equals(args[i])) {
          op = args[++i];
        } else {
          otherArgs.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage(); // exits
      }
    }

    // Set user-supplied (possibly default) job configs
    job.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
      System.out.println("ERROR: Wrong number of parameters: ");
      return printUsage();
    }

    FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
      plist.add(new Path(s));
    }

    job.setInputFormatClass(CompositeInputFormat.class);
    job.getConfiguration()
        .set(
            CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
        "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
  }
  public void _jspService(HttpServletRequest request, HttpServletResponse response)
      throws java.io.IOException, ServletException {

    PageContext pageContext = null;
    HttpSession session = null;
    ServletContext application = null;
    ServletConfig config = null;
    JspWriter out = null;
    Object page = this;
    JspWriter _jspx_out = null;
    PageContext _jspx_page_context = null;

    try {
      response.setContentType("text/html; charset=UTF-8");
      pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true);
      _jspx_page_context = pageContext;
      application = pageContext.getServletContext();
      config = pageContext.getServletConfig();
      session = pageContext.getSession();
      out = pageContext.getOut();
      _jspx_out = out;
      _jspx_resourceInjector =
          (org.apache.jasper.runtime.ResourceInjector)
              application.getAttribute("com.sun.appserv.jsp.resource.injector");

      out.write('\n');

      JobTracker tracker = (JobTracker) application.getAttribute("job.tracker");
      ClusterStatus status = tracker.getClusterStatus();
      String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine());

      out.write("\n<html>\n<head>\n<title>");
      out.print(trackerName);
      out.write(
          " Hadoop Locality Statistics</title>\n<link rel=\"stylesheet\" type=\"text/css\" href=\"/static/hadoop.css\">\n</head>\n<body>\n<h1>");
      out.print(trackerName);
      out.write(" Hadoop Locality Statistics</h1>\n\n<b>State:</b> ");
      out.print(status.getJobTrackerState());
      out.write("<br>\n<b>Started:</b> ");
      out.print(new Date(tracker.getStartTime()));
      out.write("<br>\n<b>Version:</b> ");
      out.print(VersionInfo.getVersion());
      out.write(",\n                r");
      out.print(VersionInfo.getRevision());
      out.write("<br>\n<b>Compiled:</b> ");
      out.print(VersionInfo.getDate());
      out.write(" by\n                 ");
      out.print(VersionInfo.getUser());
      out.write("<br>\n<b>Identifier:</b> ");
      out.print(tracker.getTrackerIdentifier());
      out.write("<br>\n\n<hr>\n\n");

      Collection<JobInProgress> jobs = new ArrayList<JobInProgress>();
      jobs.addAll(tracker.completedJobs());
      jobs.addAll(tracker.runningJobs());
      jobs.addAll(tracker.failedJobs());
      int dataLocalMaps = 0;
      int rackLocalMaps = 0;
      int totalMaps = 0;
      int totalReduces = 0;
      for (JobInProgress job : jobs) {
        Counters counters = job.getCounters();
        dataLocalMaps += counters.getCounter(JobInProgress.Counter.DATA_LOCAL_MAPS);
        rackLocalMaps += counters.getCounter(JobInProgress.Counter.RACK_LOCAL_MAPS);
        totalMaps += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS);
        totalReduces += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES);
      }
      int dataLocalMapPct = totalMaps == 0 ? 0 : (100 * dataLocalMaps) / totalMaps;
      int rackLocalMapPct = totalMaps == 0 ? 0 : (100 * rackLocalMaps) / totalMaps;
      int dataRackLocalMapPct =
          totalMaps == 0 ? 0 : (100 * (dataLocalMaps + rackLocalMaps)) / totalMaps;

      out.write("\n<p>\n<b>Data Local Maps:</b> ");
      out.print(dataLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(dataLocalMapPct);
      out.write("%) <br>\n<b>Rack Local Maps:</b> ");
      out.print(rackLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(rackLocalMapPct);
      out.write("%) <br>\n<b>Data or Rack Local:</b> ");
      out.print(dataLocalMaps + rackLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(dataRackLocalMapPct);
      out.write("%) <br>\n<b>Total Maps:</b> ");
      out.print(totalMaps);
      out.write(" <br>\n<b>Total Reduces:</b> ");
      out.print(totalReduces);
      out.write(" <br>\n</p>\n\n");

      out.println(ServletUtil.htmlFooter());

      out.write('\n');
    } catch (Throwable t) {
      if (!(t instanceof SkipPageException)) {
        out = _jspx_out;
        if (out != null && out.getBufferSize() != 0) out.clearBuffer();
        if (_jspx_page_context != null) _jspx_page_context.handlePageException(t);
      }
    } finally {
      _jspxFactory.releasePageContext(_jspx_page_context);
    }
  }
Exemplo n.º 16
0
    static void checkRecords(
        Configuration defaults, int noMaps, int noReduces, Path sortInput, Path sortOutput)
        throws IOException {
      JobConf jobConf = new JobConf(defaults, RecordChecker.class);
      jobConf.setJobName("sortvalidate-record-checker");

      jobConf.setInputFormat(SequenceFileInputFormat.class);
      jobConf.setOutputFormat(SequenceFileOutputFormat.class);

      jobConf.setOutputKeyClass(BytesWritable.class);
      jobConf.setOutputValueClass(IntWritable.class);

      jobConf.setMapperClass(Map.class);
      jobConf.setReducerClass(Reduce.class);

      JobClient client = new JobClient(jobConf);
      ClusterStatus cluster = client.getClusterStatus();
      if (noMaps == -1) {
        noMaps = cluster.getTaskTrackers() * jobConf.getInt("test.sortvalidate.maps_per_host", 10);
      }
      if (noReduces == -1) {
        noReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sortvalidate.reduces_per_host");
        if (sortReduces != null) {
          noReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }
      }
      jobConf.setNumMapTasks(noMaps);
      jobConf.setNumReduceTasks(noReduces);

      FileInputFormat.setInputPaths(jobConf, sortInput);
      FileInputFormat.addInputPath(jobConf, sortOutput);
      Path outputPath = new Path("/tmp/sortvalidate/recordchecker");
      FileSystem fs = FileSystem.get(defaults);
      if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
      }
      FileOutputFormat.setOutputPath(jobConf, outputPath);

      // Uncomment to run locally in a single process
      // job_conf.set("mapred.job.tracker", "local");
      Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
      System.out.println(
          "\nSortValidator.RecordChecker: Running on "
              + cluster.getTaskTrackers()
              + " nodes to validate sort from "
              + inputPaths[0]
              + ", "
              + inputPaths[1]
              + " into "
              + FileOutputFormat.getOutputPath(jobConf)
              + " with "
              + noReduces
              + " reduces.");
      Date startTime = new Date();
      System.out.println("Job started: " + startTime);
      JobClient.runJob(jobConf);
      Date end_time = new Date();
      System.out.println("Job ended: " + end_time);
      System.out.println(
          "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    }