private String[] getActiveServersList(JobContext context) { String[] servers = null; try { JobClient jc = new JobClient((JobConf) context.getConfiguration()); ClusterStatus status = jc.getClusterStatus(true); Collection<String> atc = status.getActiveTrackerNames(); servers = new String[atc.size()]; int s = 0; for (String serverInfo : atc) { // System.out.println("serverInfo:" + serverInfo); StringTokenizer st = new StringTokenizer(serverInfo, ":"); String trackerName = st.nextToken(); // System.out.println("trackerName:" + trackerName); StringTokenizer st1 = new StringTokenizer(trackerName, "_"); st1.nextToken(); servers[s++] = st1.nextToken(); } } catch (IOException e) { e.printStackTrace(); } return servers; }
/** * Generate the requested number of file splits, with the filename set to the filename of the * output file. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { /** 设置输入分片的个数* */ JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; } System.out.println("numMaps-------" + numMaps); InputSplit[] result = new InputSplit[numMaps]; Path outDir = FileOutputFormat.getOutputPath(job); for (int i = 0; i < result.length; ++i) { result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[]) null); } return result; }
/** * This is the main routine for launching a distributed random write job. It runs 10 maps/node and * each node writes 1 gig of data to a DFS file. The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); /** 建议型的 * */ System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println( "The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
@Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { switch (clusterStatus.getJobTrackerStatus()) { case INITIALIZING: return JobTrackerState.INITIALIZING; case RUNNING: return JobTrackerState.RUNNING; default: String errorMsg = "Unrecognized JobTracker state: " + clusterStatus.getJobTrackerStatus(); throw new Exception(errorMsg); } }
/** Return the status information about the Map-Reduce cluster */ public HiveClusterStatus getClusterStatus() throws HiveServerException, TException { HiveClusterStatus hcs; try { ClusterStatus cs = driver.getClusterStatus(); JobTracker.State jbs = cs.getJobTrackerState(); // Convert the ClusterStatus to its Thrift equivalent: HiveClusterStatus int state; switch (jbs) { case INITIALIZING: state = JobTrackerState.INITIALIZING; break; case RUNNING: state = JobTrackerState.RUNNING; break; default: String errorMsg = "Unrecognized JobTracker state: " + jbs.toString(); throw new Exception(errorMsg); } hcs = new HiveClusterStatus( cs.getTaskTrackers(), cs.getMapTasks(), cs.getReduceTasks(), cs.getMaxMapTasks(), cs.getMaxReduceTasks(), state); } catch (Exception e) { LOG.error(e.toString()); e.printStackTrace(); throw new HiveServerException("Unable to get cluster status: " + e.toString()); } return hcs; }
void startJobTracker(boolean wait) { // Create the JobTracker jobTracker = new JobTrackerRunner(conf); jobTrackerThread = new Thread(jobTracker); jobTrackerThread.start(); if (!wait) { return; } while (jobTracker.isActive() && !jobTracker.isUp()) { try { // let daemons get started Thread.sleep(1000); } catch (InterruptedException e) { } } // is the jobtracker has started then wait for it to init ClusterStatus status = null; if (jobTracker.isUp()) { status = jobTracker.getJobTracker().getClusterStatus(false); while (jobTracker.isActive() && status.getJobTrackerState() == JobTracker.State.INITIALIZING) { try { LOG.info("JobTracker still initializing. Waiting."); Thread.sleep(1000); } catch (InterruptedException e) { } status = jobTracker.getJobTracker().getClusterStatus(false); } } if (!jobTracker.isActive()) { // return if jobtracker has crashed return; } // Set the configuration for the task-trackers this.jobTrackerPort = jobTracker.getJobTrackerPort(); this.jobTrackerInfoPort = jobTracker.getJobTrackerInfoPort(); }
@Override public Task assignTask(TaskTrackerStatus tts, long currentTime, Collection<JobInProgress> visited) throws IOException { if (isRunnable()) { visited.add(job); TaskTrackerManager ttm = scheduler.taskTrackerManager; ClusterStatus clusterStatus = ttm.getClusterStatus(); int numTaskTrackers = clusterStatus.getTaskTrackers(); if (taskType == TaskType.MAP) { LocalityLevel localityLevel = scheduler.getAllowedLocalityLevel(job, currentTime); scheduler.getEventLog().log("ALLOWED_LOC_LEVEL", job.getJobID(), localityLevel); // obtainNewMapTask needs to be passed 1 + the desired locality level return job.obtainNewMapTask( tts, numTaskTrackers, ttm.getNumberOfUniqueHosts(), localityLevel.toCacheLevelCap()); } else { return job.obtainNewReduceTask(tts, numTaskTrackers, ttm.getNumberOfUniqueHosts()); } } else { return null; } }
/** Wait until the system is idle. */ public void waitUntilIdle() { waitTaskTrackers(); JobClient client; try { client = new JobClient(job); ClusterStatus status = client.getClusterStatus(); while (status.getTaskTrackers() + numTrackerToExclude < taskTrackerList.size()) { for (TaskTrackerRunner runner : taskTrackerList) { if (runner.isDead) { throw new RuntimeException("TaskTracker is dead"); } } Thread.sleep(1000); status = client.getClusterStatus(); } } catch (IOException ex) { throw new RuntimeException(ex); } catch (InterruptedException ex) { throw new RuntimeException(ex); } }
/** When no input dir is specified, generate random data. */ protected static void confRandom(JobConf job) throws IOException { // from RandomWriter job.setInputFormat(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); final ClusterStatus cluster = new JobClient(job).getClusterStatus(); int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { throw new IOException("Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = job.getLong( RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } job.setNumMapTasks(numMaps); }
private boolean exceededPadding( boolean isMapTask, ClusterStatus clusterStatus, int maxTaskTrackerSlots) { int numTaskTrackers = clusterStatus.getTaskTrackers(); int totalTasks = (isMapTask) ? clusterStatus.getMapTasks() : clusterStatus.getReduceTasks(); int totalTaskCapacity = isMapTask ? clusterStatus.getMaxMapTasks() : clusterStatus.getMaxReduceTasks(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); boolean exceededPadding = false; synchronized (jobQueue) { int totalNeededTasks = 0; for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) { continue; } // // Beyond the highest-priority task, reserve a little // room for failures and speculative executions; don't // schedule tasks to the hilt. // totalNeededTasks += isMapTask ? job.desiredMaps() : job.desiredReduces(); int padding = 0; if (numTaskTrackers > MIN_CLUSTER_SIZE_FOR_PADDING) { padding = Math.min(maxTaskTrackerSlots, (int) (totalNeededTasks * padFraction)); } if (totalTasks + padding >= totalTaskCapacity) { exceededPadding = true; break; } } } return exceededPadding; }
@Override public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException { TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus(); ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus(); final int numTaskTrackers = clusterStatus.getTaskTrackers(); final int clusterMapCapacity = clusterStatus.getMaxMapTasks(); final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); // // Get map + reduce counts for the current tracker. // final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots(); final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots(); final int trackerRunningMaps = taskTrackerStatus.countMapTasks(); final int trackerRunningReduces = taskTrackerStatus.countReduceTasks(); // Assigned tasks List<Task> assignedTasks = new ArrayList<Task>(); // // Compute (running + pending) map and reduce task numbers across pool // int remainingReduceLoad = 0; int remainingMapLoad = 0; synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() == JobStatus.RUNNING) { remainingMapLoad += (job.desiredMaps() - job.finishedMaps()); if (job.scheduleReduces()) { remainingReduceLoad += (job.desiredReduces() - job.finishedReduces()); } } } } // Compute the 'load factor' for maps and reduces double mapLoadFactor = 0.0; if (clusterMapCapacity > 0) { mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity; } double reduceLoadFactor = 0.0; if (clusterReduceCapacity > 0) { reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity; } // // In the below steps, we allocate first map tasks (if appropriate), // and then reduce tasks if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We assign tasks to the current taskTracker if the given machine // has a workload that's less than the maximum load of that kind of // task. // However, if the cluster is close to getting loaded i.e. we don't // have enough _padding_ for speculative executions etc., we only // schedule the "highest priority" task i.e. the task from the job // with the highest priority. // final int trackerCurrentMapCapacity = Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity); int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps; boolean exceededMapPadding = false; if (availableMapSlots > 0) { exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity); } int numLocalMaps = 0; int numNonLocalMaps = 0; boolean newIterationJob = false; scheduleMaps: for (int i = 0; i < availableMapSlots; ++i) { synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } if (job.getJobConf().isIterative()) { String iterativeAppID = job.getJobConf().getIterativeAlgorithmID(); if (iterativeAppID.equals("none")) { throw new IOException("please specify the iteration ID!"); } String jointype = job.getJobConf().get("mapred.iterative.jointype"); // prepare the iterationid map and jobtask map if (!this.tracker_mtask_map.containsKey(iterativeAppID)) { // a new iterative algorithm Map<String, LinkedList<Integer>> new_tracker_task_map = new HashMap<String, LinkedList<Integer>>(); this.tracker_mtask_map.put(iterativeAppID, new_tracker_task_map); Map<String, LinkedList<Integer>> new_tracker_rtask_map = new HashMap<String, LinkedList<Integer>>(); this.tracker_rtask_map.put(iterativeAppID, new_tracker_rtask_map); // record the first job of the series of jobs in the iterations this.first_job_map.put(iterativeAppID, job.getJobID()); // record the list of jobs for a iteration HashSet<JobID> jobs = new HashSet<JobID>(); jobs.add(job.getJobID()); this.iteration_jobs_map.put(iterativeAppID, jobs); } // this is the first job of the series of jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID()) && job.getJobConf().isIterative()) { LOG.info(job.getJobID() + " is the first iteration job"); newIterationJob = true; } // this is one of the following jobs, and prepare a assignment list for the assignment if (!newIterationJob) { LOG.info(job.getJobID() + " is not the first iteration job"); this.iteration_jobs_map.get(iterativeAppID).add(job.getJobID()); if (this.mtask_assign_map.get(job.getJobID()) == null) { // prepare the map task assignment list LOG.info("for job " + job.getJobID() + "'s assignment:"); Map<String, LinkedList<Integer>> map_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_mtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); LOG.info("assign on tracker " + tracker); for (int taskid : taskids) { copytaskids.add(taskid); LOG.info("task id " + taskid); } map_task_assign.put(tracker, copytaskids); } this.mtask_assign_map.put(job.getJobID(), map_task_assign); // if one2one copy the map assign to reduce assign, the are with the same mapping if (jointype.equals("one2one")) { // prepare the reduce task assignment list Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_mtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for (int taskid : taskids) { copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.tracker_rtask_map.put(iterativeAppID, reduce_task_assign); } // prepare the reduce task assignment list for all cases Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for (Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()) { String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for (int taskid : taskids) { copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } } Task t = null; // the first iteration or following iteration // if the first iteration: assign taskid by default (exception for the one2mul case, // where we assign staring from 0,...,n) // else if the following iterations: assign taskid based on the first iteration // assignment if (newIterationJob) { /** * the one2mul case should be carefully taken care, we want to assgin map0,map1,map2 * and reduce0 to a tracker, and assign map3,map4,map5 and reduce1 to another tracker */ if (jointype.equals("one2mul") && !tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { // if contain the tracker, that means we have assigned tasks for this tracker int scala = job.getJobConf().getInt("mapred.iterative.data.scala", 1); // int mapsEachTracker = job.getJobConf().getNumMapTasks() / numTaskTrackers; int reducersEachTracker = job.getJobConf().getNumReduceTasks() / numTaskTrackers; if (job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0) throw new IOException( "job.getJobConf().getNumReduceTasks() % numTaskTrackers != 0"); if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } if (!this.tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_rtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } // for debugging String debugout1 = "maps: "; String debugout2 = "reduces: "; int reduceOffsetId = (tracker_rtask_map.get(iterativeAppID).size() - 1) * reducersEachTracker; // the start reduce id for (int count = 0; count < reducersEachTracker; count++) { int reducepartitionid = reduceOffsetId + count; debugout2 += reducepartitionid + " "; tracker_rtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(reducepartitionid); for (int count2 = 0; count2 < scala; count2++) { int mappartitionid = reducepartitionid * scala + count2; // int mapid = job.splitTaskMap.get(mappartitionid); debugout1 += mappartitionid + " "; this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(mappartitionid); } } // print out for debug LOG.info( "tracker " + taskTracker.getTrackerName() + " assigned tasks " + debugout1 + " and " + debugout2); // make the assignment list String tracker = taskTracker.getTrackerName(); LinkedList<Integer> mtaskids = this.tracker_mtask_map.get(iterativeAppID).get(taskTracker.getTrackerName()); LinkedList<Integer> mcopytaskids = new LinkedList<Integer>(); for (int taskid : mtaskids) { mcopytaskids.add(taskid); } if (!mtask_assign_map.containsKey(job.getJobID())) { Map<String, LinkedList<Integer>> map_task_assign = new HashMap<String, LinkedList<Integer>>(); this.mtask_assign_map.put(job.getJobID(), map_task_assign); } this.mtask_assign_map.get(job.getJobID()).put(tracker, mcopytaskids); // prepare the reduce task assignment list LinkedList<Integer> rtaskids = this.tracker_rtask_map.get(iterativeAppID).get(taskTracker.getTrackerName()); LinkedList<Integer> rcopytaskids = new LinkedList<Integer>(); for (int taskid : rtaskids) { rcopytaskids.add(taskid); } if (!rtask_assign_map.containsKey(job.getJobID())) { Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } this.rtask_assign_map.get(job.getJobID()).put(tracker, rcopytaskids); // assign a map task for this tracker Integer target = null; try { target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); } catch (Exception e) { e.printStackTrace(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } } else { Integer target = null; try { target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); } catch (Exception e) { e.printStackTrace(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } if (t != null) { assignedTasks.add(t); ++numLocalMaps; // new iteration job and the first task for a tasktracker // for one2mul case, we don't need to record the assignment, since we already made the // assignment list beforehand if (!newIterationJob || jointype.equals("one2mul")) { // poll, remove this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } else { // record the assignment list for map tasks if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); // prepare the reduce assignment, for mapping with reduce if (jointype.equals("one2one")) { // prepare the reduce assignment, for mapping with reduce if (!first_job_reduces_map.containsKey(iterativeAppID)) { Map<String, LinkedList<Integer>> tracker_reduce_map = new HashMap<String, LinkedList<Integer>>(); first_job_reduces_map.put(iterativeAppID, tracker_reduce_map); } if (!first_job_reduces_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> reduces = new LinkedList<Integer>(); first_job_reduces_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), reduces); } first_job_reduces_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); } LOG.info("assigning task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task break; } LOG.error("New Node Or Rack Local Map Task failed!"); if (newIterationJob) { // Try to schedule a node-local or rack-local Map task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } else { Integer target = this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // new iteration job and the first task for a tasktracker if (newIterationJob) { if (!this.tracker_mtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_mtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_mtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); } else { // poll, remove this.mtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); } // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } else { // not an iterative algorithm, normal schedule Task t = null; // Try to schedule a node-local or rack-local Map task t = job.obtainNewNodeOrRackLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numLocalMaps; // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task break; } // Try to schedule a node-local or rack-local Map task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } } } } int assignedMaps = assignedTasks.size(); // // Same thing, but for reduce tasks // However we _never_ assign more than 1 reduce task per heartbeat // /** should maintain the reduce task location for the termination check */ final int trackerCurrentReduceCapacity = Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity); final int availableReduceSlots = Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1); boolean exceededReducePadding = false; // LOG.info("availableReduceSlots " + availableReduceSlots); if (availableReduceSlots > 0) { exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity); synchronized (jobQueue) { for (JobInProgress job : jobQueue) { LOG.info("job " + job.getJobID()); if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) { LOG.info("have to continue " + job.getStatus().getRunState()); continue; } Task t = null; if (job.getJobConf().isIterative()) { String iterativeAppID = job.getJobConf().getIterativeAlgorithmID(); if (iterativeAppID.equals("none")) { throw new IOException("please specify the iteration ID!"); } String jointype = job.getJobConf().get("mapred.iterative.jointype"); if (jointype.equals("one2one")) { // one-to-one or one-to-mul jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID()) && job.getJobConf().isIterative()) { LOG.info(job.getJobID() + " is the first iteration job for reduce"); newIterationJob = true; } Integer target = null; if (newIterationJob) { if (first_job_reduces_map.get(iterativeAppID) == null) { throw new IOException( "I think something is wrong since the tasktracker never receive " + "a map task with iterativeapp id " + iterativeAppID); } if (first_job_reduces_map.get(iterativeAppID).get(taskTracker.getTrackerName()) == null) { throw new IOException( "I think something is wrong since the tasktracker never receive " + "a map task with iterativeapp id " + iterativeAppID + " from " + taskTracker.getTrackerName()); } target = this.first_job_reduces_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .pollFirst(); } else { // the task assignment has already been processed during the map task assignment, so // never use tracker_rtask_map target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); } if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all reduce tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else if (jointype.equals("one2mul")) { Integer target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all reduce tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } } else { // one-to-all case, assign tasks in the first iteration job, and remember this mapping // this is the first job of the series of jobs if (this.first_job_map.get(iterativeAppID).equals(job.getJobID())) { LOG.info(job.getJobID() + " is the first iteration job for reduce"); newIterationJob = true; } /* //this is one of the following jobs, and prepare a assignment list for the assignment else{ LOG.info(job.getJobID() + " is not the first iteration job for reduce"); if(this.rtask_assign_map.get(job.getJobID()) == null){ //prepare the map task assignment list Map<String, LinkedList<Integer>> reduce_task_assign = new HashMap<String, LinkedList<Integer>>(); for(Map.Entry<String, LinkedList<Integer>> entry : this.tracker_rtask_map.get(iterativeAppID).entrySet()){ String tracker = entry.getKey(); LinkedList<Integer> taskids = entry.getValue(); LinkedList<Integer> copytaskids = new LinkedList<Integer>(); for(int taskid : taskids){ copytaskids.add(taskid); } reduce_task_assign.put(tracker, copytaskids); } this.rtask_assign_map.put(job.getJobID(), reduce_task_assign); } } */ // the first iteration or following iteration // if the first iteration: assign taskid by default // else if the following iterations: assign taskid based on the first iteration // assignment if (newIterationJob) { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { if (!this.tracker_rtask_map .get(iterativeAppID) .containsKey(taskTracker.getTrackerName())) { LinkedList<Integer> tasklist = new LinkedList<Integer>(); this.tracker_rtask_map .get(iterativeAppID) .put(taskTracker.getTrackerName(), tasklist); } this.tracker_rtask_map .get(iterativeAppID) .get(taskTracker.getTrackerName()) .add(t.getTaskID().getTaskID().getId()); LOG.info( "assigning reduce task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } } else { Integer target = this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .peekFirst(); if (target == null) { // all have been assigned, no more work, maybe it should help others to process LOG.info( "all map tasks on tasktracker " + taskTracker.getTrackerName() + " have been processed"); break; } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts(), target); } if (t != null) { // poll, remove this.rtask_assign_map .get(job.getJobID()) .get(taskTracker.getTrackerName()) .pollFirst(); LOG.info( "assigning reduce task " + t.getTaskID() + " on " + taskTracker.getTrackerName()); } } } } else { t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } LOG.info("try to assign new task " + t); if (t != null) { assignedTasks.add(t); break; } // Don't assign reduce tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededReducePadding) { break; } } } } if (LOG.isDebugEnabled()) { LOG.debug( "Task assignments for " + taskTrackerStatus.getTrackerName() + " --> " + "[" + mapLoadFactor + ", " + trackerMapCapacity + ", " + trackerCurrentMapCapacity + ", " + trackerRunningMaps + "] -> [" + (trackerCurrentMapCapacity - trackerRunningMaps) + ", " + assignedMaps + " (" + numLocalMaps + ", " + numNonLocalMaps + ")] [" + reduceLoadFactor + ", " + trackerReduceCapacity + ", " + trackerCurrentReduceCapacity + "," + trackerRunningReduces + "] -> [" + (trackerCurrentReduceCapacity - trackerRunningReduces) + ", " + (assignedTasks.size() - assignedMaps) + "]"); } return assignedTasks; }
/* * TODO: * For Elf: need to change the major schedule logic, scheduling need * to be *datacenter-aware* * */ @Override public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException { TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus(); ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus(); final int numTaskTrackers = clusterStatus.getTaskTrackers(); final int clusterMapCapacity = clusterStatus.getMaxMapTasks(); final int clusterReduceCapacity = clusterStatus.getMaxReduceTasks(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); // // Get map + reduce counts for the current tracker. // final int trackerMapCapacity = taskTrackerStatus.getMaxMapSlots(); final int trackerReduceCapacity = taskTrackerStatus.getMaxReduceSlots(); final int trackerRunningMaps = taskTrackerStatus.countMapTasks(); final int trackerRunningReduces = taskTrackerStatus.countReduceTasks(); // Assigned tasks List<Task> assignedTasks = new ArrayList<Task>(); // // Compute (running + pending) map and reduce task numbers across pool // int remainingReduceLoad = 0; int remainingMapLoad = 0; synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() == JobStatus.RUNNING) { remainingMapLoad += (job.desiredMaps() - job.finishedMaps()); if (job.scheduleReduces()) { remainingReduceLoad += (job.desiredReduces() - job.finishedReduces()); } } } } // Compute the 'load factor' for maps and reduces double mapLoadFactor = 0.0; if (clusterMapCapacity > 0) { mapLoadFactor = (double) remainingMapLoad / clusterMapCapacity; } double reduceLoadFactor = 0.0; if (clusterReduceCapacity > 0) { reduceLoadFactor = (double) remainingReduceLoad / clusterReduceCapacity; } // // In the below steps, we allocate first map tasks (if appropriate), // and then reduce tasks if appropriate. We go through all jobs // in order of job arrival; jobs only get serviced if their // predecessors are serviced, too. // // // We assign tasks to the current taskTracker if the given machine // has a workload that's less than the maximum load of that kind of // task. // However, if the cluster is close to getting loaded i.e. we don't // have enough _padding_ for speculative executions etc., we only // schedule the "highest priority" task i.e. the task from the job // with the highest priority. // final int trackerCurrentMapCapacity = Math.min((int) Math.ceil(mapLoadFactor * trackerMapCapacity), trackerMapCapacity); int availableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps; boolean exceededMapPadding = false; if (availableMapSlots > 0) { exceededMapPadding = exceededPadding(true, clusterStatus, trackerMapCapacity); } int numLocalMaps = 0; int numNonLocalMaps = 0; scheduleMaps: // TODO: for Elf // The main schedule logic here, outer for loop is for every slot, inner loop is for each job for (int i = 0; i < availableMapSlots; ++i) { synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } Task t = null; // Try to schedule a node-local or rack-local Map task t = job.obtainNewLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numLocalMaps; // Don't assign map tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededMapPadding) { break scheduleMaps; } // Try all jobs again for the next Map task // Note: it's FIFO here: next time in the inner for loop the head-of-queue // will still be chosen break; } // If no locality for this job, try launching non-local // Try to schedule a node-local or rack-local Map task --> original comments // FIXME: is the above comment correct? seems should be non-local task t = job.obtainNewNonLocalMapTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); ++numNonLocalMaps; // We assign at most 1 off-switch or speculative task // This is to prevent TaskTrackers from stealing local-tasks // from other TaskTrackers. break scheduleMaps; } } } } int assignedMaps = assignedTasks.size(); // // Same thing, but for reduce tasks // However we _never_ assign more than 1 reduce task per heartbeat // final int trackerCurrentReduceCapacity = Math.min((int) Math.ceil(reduceLoadFactor * trackerReduceCapacity), trackerReduceCapacity); final int availableReduceSlots = Math.min((trackerCurrentReduceCapacity - trackerRunningReduces), 1); boolean exceededReducePadding = false; if (availableReduceSlots > 0) { exceededReducePadding = exceededPadding(false, clusterStatus, trackerReduceCapacity); synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() != JobStatus.RUNNING || job.numReduceTasks == 0) { continue; } Task t = job.obtainNewReduceTask( taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); if (t != null) { assignedTasks.add(t); break; } // Don't assign reduce tasks to the hilt! // Leave some free slots in the cluster for future task-failures, // speculative tasks etc. beyond the highest priority job if (exceededReducePadding) { break; } } } } if (LOG.isDebugEnabled()) { LOG.debug( "Task assignments for " + taskTrackerStatus.getTrackerName() + " --> " + "[" + mapLoadFactor + ", " + trackerMapCapacity + ", " + trackerCurrentMapCapacity + ", " + trackerRunningMaps + "] -> [" + (trackerCurrentMapCapacity - trackerRunningMaps) + ", " + assignedMaps + " (" + numLocalMaps + ", " + numNonLocalMaps + ")] [" + reduceLoadFactor + ", " + trackerReduceCapacity + ", " + trackerCurrentReduceCapacity + "," + trackerRunningReduces + "] -> [" + (trackerCurrentReduceCapacity - trackerRunningReduces) + ", " + (assignedTasks.size() - assignedMaps) + "]"); } return assignedTasks; }
/** * Performs a range query using MapReduce * * @param fs * @param inputFile * @param queryRange * @param shape * @param output * @return * @throws IOException */ public static long rangeQueryMapReduce( FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape, Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException { JobConf job = new JobConf(FileMBR.class); FileSystem outFs = inputFile.getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path( inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } else { if (outFs.exists(outputPath)) { if (overwrite) { outFs.delete(outputPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("RangeQuery"); job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); RangeFilter.setQueryRange(job, queryShape); // Set query range for // filter ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(3); // Decide which map function to use depending on how blocks are indexed // And also which input format to use if (SpatialSite.isRTree(fs, inputFile)) { // RTree indexed file LOG.info("Searching an RTree indexed file"); job.setInputFormat(RTreeInputFormat.class); } else { // A file with no local index LOG.info("Searching a non local-indexed file"); job.setInputFormat(ShapeInputFormat.class); } GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile); // if (gIndex != null && gIndex.isReplicated()){ // job.setMapperClass(RangeQueryMap.class); Class<?> OutputKey = NullWritable.class; try { Class<?> c = shape.getClass(); Field f = c.getDeclaredField(query.field); f.setAccessible(true); if (f.getType().equals(Integer.TYPE)) { OutputKey = IntWritable.class; } else if (f.getType().equals(Double.TYPE)) { OutputKey = DoubleWritable.class; } else if (f.getType().equals(Long.TYPE)) { OutputKey = LongWritable.class; } } catch (SecurityException e) { e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setMapOutputKeyClass(OutputKey); switch (query.type) { case Distinct: job.setMapperClass(DistinctQueryMap.class); job.setReducerClass(DistinctQueryReduce.class); job.setMapOutputValueClass(NullWritable.class); break; case Distribution: job.setMapperClass(DistributionQueryMap.class); job.setReducerClass(DistributionQueryReduce.class); job.setMapOutputValueClass(IntWritable.class); break; default: break; } // } // else // job.setMapperClass(RangeQueryMapNoDupAvoidance.class); // Set query range for the map function job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName()); job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString()); job.set(QUERY_FIELD, query.field); // Set shape class for the SpatialInputFormat SpatialSite.setShapeClass(job, shape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inputFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job if (!background) { RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // If outputPath not set by user, automatically delete it if (userOutputPath == null) outFs.delete(outputPath, true); return resultCount; } else { JobClient jc = new JobClient(job); lastRunningJob = jc.submitJob(job); return -1; } }
/** * The main driver for sort program. Invoke this method to submit the map/reduce job. * * @throws Exception When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = new Job(conf); job.setJobName("join"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration() .set( CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
public void _jspService(HttpServletRequest request, HttpServletResponse response) throws java.io.IOException, ServletException { PageContext pageContext = null; HttpSession session = null; ServletContext application = null; ServletConfig config = null; JspWriter out = null; Object page = this; JspWriter _jspx_out = null; PageContext _jspx_page_context = null; try { response.setContentType("text/html; charset=UTF-8"); pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true); _jspx_page_context = pageContext; application = pageContext.getServletContext(); config = pageContext.getServletConfig(); session = pageContext.getSession(); out = pageContext.getOut(); _jspx_out = out; _jspx_resourceInjector = (org.apache.jasper.runtime.ResourceInjector) application.getAttribute("com.sun.appserv.jsp.resource.injector"); out.write('\n'); JobTracker tracker = (JobTracker) application.getAttribute("job.tracker"); ClusterStatus status = tracker.getClusterStatus(); String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine()); out.write("\n<html>\n<head>\n<title>"); out.print(trackerName); out.write( " Hadoop Locality Statistics</title>\n<link rel=\"stylesheet\" type=\"text/css\" href=\"/static/hadoop.css\">\n</head>\n<body>\n<h1>"); out.print(trackerName); out.write(" Hadoop Locality Statistics</h1>\n\n<b>State:</b> "); out.print(status.getJobTrackerState()); out.write("<br>\n<b>Started:</b> "); out.print(new Date(tracker.getStartTime())); out.write("<br>\n<b>Version:</b> "); out.print(VersionInfo.getVersion()); out.write(",\n r"); out.print(VersionInfo.getRevision()); out.write("<br>\n<b>Compiled:</b> "); out.print(VersionInfo.getDate()); out.write(" by\n "); out.print(VersionInfo.getUser()); out.write("<br>\n<b>Identifier:</b> "); out.print(tracker.getTrackerIdentifier()); out.write("<br>\n\n<hr>\n\n"); Collection<JobInProgress> jobs = new ArrayList<JobInProgress>(); jobs.addAll(tracker.completedJobs()); jobs.addAll(tracker.runningJobs()); jobs.addAll(tracker.failedJobs()); int dataLocalMaps = 0; int rackLocalMaps = 0; int totalMaps = 0; int totalReduces = 0; for (JobInProgress job : jobs) { Counters counters = job.getCounters(); dataLocalMaps += counters.getCounter(JobInProgress.Counter.DATA_LOCAL_MAPS); rackLocalMaps += counters.getCounter(JobInProgress.Counter.RACK_LOCAL_MAPS); totalMaps += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS); totalReduces += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES); } int dataLocalMapPct = totalMaps == 0 ? 0 : (100 * dataLocalMaps) / totalMaps; int rackLocalMapPct = totalMaps == 0 ? 0 : (100 * rackLocalMaps) / totalMaps; int dataRackLocalMapPct = totalMaps == 0 ? 0 : (100 * (dataLocalMaps + rackLocalMaps)) / totalMaps; out.write("\n<p>\n<b>Data Local Maps:</b> "); out.print(dataLocalMaps); out.write(' '); out.write('('); out.print(dataLocalMapPct); out.write("%) <br>\n<b>Rack Local Maps:</b> "); out.print(rackLocalMaps); out.write(' '); out.write('('); out.print(rackLocalMapPct); out.write("%) <br>\n<b>Data or Rack Local:</b> "); out.print(dataLocalMaps + rackLocalMaps); out.write(' '); out.write('('); out.print(dataRackLocalMapPct); out.write("%) <br>\n<b>Total Maps:</b> "); out.print(totalMaps); out.write(" <br>\n<b>Total Reduces:</b> "); out.print(totalReduces); out.write(" <br>\n</p>\n\n"); out.println(ServletUtil.htmlFooter()); out.write('\n'); } catch (Throwable t) { if (!(t instanceof SkipPageException)) { out = _jspx_out; if (out != null && out.getBufferSize() != 0) out.clearBuffer(); if (_jspx_page_context != null) _jspx_page_context.handlePageException(t); } } finally { _jspxFactory.releasePageContext(_jspx_page_context); } }
static void checkRecords( Configuration defaults, int noMaps, int noReduces, Path sortInput, Path sortOutput) throws IOException { JobConf jobConf = new JobConf(defaults, RecordChecker.class); jobConf.setJobName("sortvalidate-record-checker"); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (noMaps == -1) { noMaps = cluster.getTaskTrackers() * jobConf.getInt("test.sortvalidate.maps_per_host", 10); } if (noReduces == -1) { noReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sortvalidate.reduces_per_host"); if (sortReduces != null) { noReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } } jobConf.setNumMapTasks(noMaps); jobConf.setNumReduceTasks(noReduces); FileInputFormat.setInputPaths(jobConf, sortInput); FileInputFormat.addInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.get(defaults); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process // job_conf.set("mapred.job.tracker", "local"); Path[] inputPaths = FileInputFormat.getInputPaths(jobConf); System.out.println( "\nSortValidator.RecordChecker: Running on " + cluster.getTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths[1] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + noReduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); }