Java JobConf.getNumMapTasks Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapred

Class/Type: JobConf

Method/Function: getNumMapTasks

Examples at hotexamples.com: 5

Java JobConf.getNumMapTasks - 5 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapred.JobConf.getNumMapTasks extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

Example #1

Show file

File: MapReduceInputFormatWrapper.java Project: HimanshuBhardwaj/elephant-bird

  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context);

    initInputFormat(jobConf);

    org.apache.hadoop.mapred.InputSplit[] splits =
        realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks());

    if (splits == null) {
      return null;
    }

    List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length);

    for (org.apache.hadoop.mapred.InputSplit split : splits) {
      if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) {
        org.apache.hadoop.mapred.FileSplit mapredFileSplit =
            ((org.apache.hadoop.mapred.FileSplit) split);
        resultSplits.add(
            new FileSplit(
                mapredFileSplit.getPath(),
                mapredFileSplit.getStart(),
                mapredFileSplit.getLength(),
                mapredFileSplit.getLocations()));
      } else {
        resultSplits.add(new InputSplitWrapper(split));
      }
    }

    return resultSplits;
  }

Example #2

Show file

File: DistRaid.java Project: fire9/hadoop-20

  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }

Example #3

Show file

File: JobSubmitter.java Project: nourlcn/yarn-comment

 // method to write splits for old api mapper.
 private int writeOldSplits(JobConf job, Path jobSubmitDir) throws IOException {
   org.apache.hadoop.mapred.InputSplit[] splits =
       job.getInputFormat().getSplits(job, job.getNumMapTasks());
   // sort the splits into order based on size, so that the biggest
   // go first
   Arrays.sort(
       splits,
       new Comparator<org.apache.hadoop.mapred.InputSplit>() {
         public int compare(
             org.apache.hadoop.mapred.InputSplit a, org.apache.hadoop.mapred.InputSplit b) {
           try {
             long left = a.getLength();
             long right = b.getLength();
             if (left == right) {
               return 0;
             } else if (left < right) {
               return 1;
             } else {
               return -1;
             }
           } catch (IOException ie) {
             throw new RuntimeException("Problem getting input split size", ie);
           }
         }
       });
   JobSplitWriter.createSplitFiles(jobSubmitDir, job, jobSubmitDir.getFileSystem(job), splits);
   return splits.length;
 }

Example #4

Show file

File: TeraInputFormat.java Project: ifloating/hadoop-source-reading

 /**
  * Use the input splits to take samples of the input and generate sample keys. By default reads
  * 100,000 keys from 10 locations in the input, sorts them and picks N-1 keys to generate N
  * equally sized partitions.
  *
  * @param conf the job to sample
  * @param partFile where to write the output file to
  * @throws IOException if something goes wrong
  */
 public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
   TeraInputFormat inFormat = new TeraInputFormat();
   TextSampler sampler = new TextSampler();
   Text key = new Text();
   Text value = new Text();
   int partitions = conf.getNumReduceTasks();
   long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
   InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
   int samples = Math.min(10, splits.length);
   long recordsPerSample = sampleSize / samples;
   int sampleStep = splits.length / samples;
   long records = 0;
   // take N samples from different parts of the input
   for (int i = 0; i < samples; ++i) {
     RecordReader<Text, Text> reader =
         inFormat.getRecordReader(splits[sampleStep * i], conf, null);
     while (reader.next(key, value)) {
       sampler.addKey(key);
       records += 1;
       if ((i + 1) * recordsPerSample <= records) {
         break;
       }
     }
   }
   FileSystem outFs = partFile.getFileSystem(conf);
   if (outFs.exists(partFile)) {
     outFs.delete(partFile, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   for (Text split : sampler.createPartitions(partitions)) {
     writer.append(split, nullValue);
   }
   writer.close();
 }

Example #5

Show file

File: ESInputFormat.java Project: purplecow/elasticsearch-hadoop

 //
 // new API - just delegates to the Old API
 //
 @Override
 public List<InputSplit> getSplits(JobContext context) {
   JobConf conf = (JobConf) context.getConfiguration();
   return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
 }