Java JobSplitWriter Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapreduce.split

Class/Type: JobSplitWriter

Examples at hotexamples.com: 3

Java JobSplitWriter - 3 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapreduce.split.JobSplitWriter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

createSplitFiles(3)

Example #1

Show file

File: JobSubmitter.java Project: nourlcn/yarn-comment

 // method to write splits for old api mapper.
 private int writeOldSplits(JobConf job, Path jobSubmitDir) throws IOException {
   org.apache.hadoop.mapred.InputSplit[] splits =
       job.getInputFormat().getSplits(job, job.getNumMapTasks());
   // sort the splits into order based on size, so that the biggest
   // go first
   Arrays.sort(
       splits,
       new Comparator<org.apache.hadoop.mapred.InputSplit>() {
         public int compare(
             org.apache.hadoop.mapred.InputSplit a, org.apache.hadoop.mapred.InputSplit b) {
           try {
             long left = a.getLength();
             long right = b.getLength();
             if (left == right) {
               return 0;
             } else if (left < right) {
               return 1;
             } else {
               return -1;
             }
           } catch (IOException ie) {
             throw new RuntimeException("Problem getting input split size", ie);
           }
         }
       });
   JobSplitWriter.createSplitFiles(jobSubmitDir, job, jobSubmitDir.getFileSystem(job), splits);
   return splits.length;
 }

Example #2

Show file

File: JobSubmitter.java Project: nourlcn/yarn-comment

  @SuppressWarnings("unchecked")
  private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

    List<InputSplit> splits = input.getSplits(job);
    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    //// num of split. the same as num of maps
    return array.length;
  }

Example #3

Show file

File: TestMapProgress.java Project: imace/hops

  /**
   * Validates map phase progress after each record is processed by map task using custom task
   * reporter.
   */
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
    jobId = taskId.getJobID();

    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(
        new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits);
    TaskSplitMetaInfo[] splitMetaInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api
    for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1
      map =
          new TestMapTask(
              job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
              taskId,
              i,
              splitMetaInfo[i].getSplitIndex(),
              1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }