// method to write splits for old api mapper. private int writeOldSplits(JobConf job, Path jobSubmitDir) throws IOException { org.apache.hadoop.mapred.InputSplit[] splits = job.getInputFormat().getSplits(job, job.getNumMapTasks()); // sort the splits into order based on size, so that the biggest // go first Arrays.sort( splits, new Comparator<org.apache.hadoop.mapred.InputSplit>() { public int compare( org.apache.hadoop.mapred.InputSplit a, org.apache.hadoop.mapred.InputSplit b) { try { long left = a.getLength(); long right = b.getLength(); if (left == right) { return 0; } else if (left < right) { return 1; } else { return -1; } } catch (IOException ie) { throw new RuntimeException("Problem getting input split size", ie); } } }); JobSplitWriter.createSplitFiles(jobSubmitDir, job, jobSubmitDir.getFileSystem(job), splits); return splits.length; }
@SuppressWarnings("unchecked") private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new SplitComparator()); JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array); //// num of split. the same as num of maps return array.length; }
/** * Validates map phase progress after each record is processed by map task using custom task * reporter. */ public void testMapProgress() throws Exception { JobConf job = new JobConf(); fs = FileSystem.getLocal(job); Path rootDir = new Path(TEST_ROOT_DIR); createInputFile(rootDir); job.setNumReduceTasks(0); TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0"); job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class); job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR); jobId = taskId.getJobID(); JobContext jContext = new JobContextImpl(job, jobId); InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job); List<InputSplit> splits = input.getSplits(jContext); JobSplitWriter.createSplitFiles( new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits); TaskSplitMetaInfo[] splitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR)); job.setUseNewMapper(true); // use new api for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1 map = new TestMapTask( job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml", taskId, i, splitMetaInfo[i].getSplitIndex(), 1); JobConf localConf = new JobConf(job); map.localizeConfiguration(localConf); map.setConf(localConf); map.run(localConf, fakeUmbilical); } // clean up fs.delete(rootDir, true); }