/** * Produce splits such that each is no greater than the quotient of the total size and the * number of splits requested. * * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) { throw new RuntimeException( "Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePair value = new FilePair(); final long targetsize = cbsize / numSplits; long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. if (acc + key.get() > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[]) null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += key.get(); } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[]) null)); } return splits.toArray(new FileSplit[splits.size()]); }
/** * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks. * This exists so the reduce task thread can locate map task outputs. */ public synchronized MapOutputLocation[] locateMapOutputs( String taskId, String[][] mapTasksNeeded) { ArrayList v = new ArrayList(); for (int i = 0; i < mapTasksNeeded.length; i++) { for (int j = 0; j < mapTasksNeeded[i].length; j++) { TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]); if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) { String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]); TaskTrackerStatus tracker; synchronized (taskTrackers) { tracker = (TaskTrackerStatus) taskTrackers.get(trackerId); } v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort())); break; } } } // randomly shuffle results to load-balance map output requests Collections.shuffle(v); return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]); }