예제 #1
0
 // method to write splits for old api mapper.
 private int writeOldSplits(JobConf job, Path jobSubmitDir) throws IOException {
   org.apache.hadoop.mapred.InputSplit[] splits =
       job.getInputFormat().getSplits(job, job.getNumMapTasks());
   // sort the splits into order based on size, so that the biggest
   // go first
   Arrays.sort(
       splits,
       new Comparator<org.apache.hadoop.mapred.InputSplit>() {
         public int compare(
             org.apache.hadoop.mapred.InputSplit a, org.apache.hadoop.mapred.InputSplit b) {
           try {
             long left = a.getLength();
             long right = b.getLength();
             if (left == right) {
               return 0;
             } else if (left < right) {
               return 1;
             } else {
               return -1;
             }
           } catch (IOException ie) {
             throw new RuntimeException("Problem getting input split size", ie);
           }
         }
       });
   JobSplitWriter.createSplitFiles(jobSubmitDir, job, jobSubmitDir.getFileSystem(job), splits);
   return splits.length;
 }
예제 #2
0
  @SuppressWarnings("unchecked")
  private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

    List<InputSplit> splits = input.getSplits(job);
    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    //// num of split. the same as num of maps
    return array.length;
  }
예제 #3
0
  /**
   * Validates map phase progress after each record is processed by map task using custom task
   * reporter.
   */
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
    jobId = taskId.getJobID();

    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(
        new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits);
    TaskSplitMetaInfo[] splitMetaInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api
    for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1
      map =
          new TestMapTask(
              job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
              taskId,
              i,
              splitMetaInfo[i].getSplitIndex(),
              1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }