Java JobConf.setMapRunnerClass Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapred

Class/Type: JobConf

Method/Function: setMapRunnerClass

Examples at hotexamples.com: 4

Java JobConf.setMapRunnerClass - 4 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapred.JobConf.setMapRunnerClass extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMapOutputValueClass(30)

setOutputValueClass(30)

setJobName(30)

setMapperClass(30)

setInputFormat(30)

set(30)

setNumMapTasks(30)

setNumReduceTasks(30)

setOutputFormat(30)

setMapOutputKeyClass(30)

setOutputKeyClass(30)

getInt(30)

setReducerClass(30)

get(30)

setCombinerClass(27)

setInt(25)

setBoolean(23)

getBoolean(18)

setJarByClass(16)

getLong(14)

setLong(12)

setPartitionerClass(12)

setMapSpeculativeExecution(10)

getFloat(8)

setClass(7)

setJar(6)

setOutputKeyComparatorClass(6)

setReduceSpeculativeExecution(5)

getCredentials(5)

setOutputValueGroupingComparator(5)

getNumMapTasks(5)

setNumTasksToExecutePerJvm(4)

getJobName(4)

setMapRunnerClass(4)

addResource(4)

getNumReduceTasks(4)

setMaxMapAttempts(4)

setCompressMapOutput(4)

getInputFormat(4)

setSpeculativeExecution(4)

setStrings(3)

setClassLoader(3)

setOutputPath(3)

getMapOutputValueClass(3)

getMapOutputKeyClass(3)

setJobPriority(3)

setFloat(3)

setQueueName(2)

setMaxReduceAttempts(2)

addInputPath(2)

Example #1

Show file

File: MapreduceStringFinder.java Project: computingfacts/open-Technologies

  public int run(String[] args) throws Exception {
    if (args.length != 3) {
      System.err.println("Usage: " + getClass().getName() + " <input> <output> <nPopulation>");
      // ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    // Create a JobConf using the processed <code>conf</code>
    final JobConf jobConf = new JobConf(getConf(), getClass());

    // Specify various job-specific parameters
    jobConf.setJobName(MapreduceStringFinder.class.getSimpleName());

    // setting the input format
    jobConf.setInputFormat(Individuals.class);

    // setting the output ke and value class
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(BooleanWritable.class);

    // setting the mapper class
    jobConf.setMapperClass(CIMapper.class);
    jobConf.setNumMapTasks(3); // setting number of maptasks

    // setting the reducer class
    jobConf.setReducerClass(CIReducer.class);

    // setup input/output directories
    final String dataset = args[0];

    FileInputFormat.setInputPaths(jobConf, new Path(dataset));
    FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));
    final int pop = Integer.parseInt(args[2]);

    // based on the configuration, make this job threadable
    if (jobConf.getInt("mapred.tasktracker.map.tasks.maximum", 2) == 1) {
      jobConf.setMapRunnerClass(MultithreadedMapRunner.class);
      jobConf.setInt("mapred.map.multithreadedrunner.threads", 100);
    }
    jobConf.setInt("mapred.map.multithreadedrunner.threads", 100);
    // for computation intensive data, do not allow the job to fail if the task tracker does not
    // respond
    // with a heatbeat message before the timeout value
    final int timeout = 9000000;
    jobConf.setInt("mapred.task.timeout", timeout);

    // set the parameters to be available before a call to the mapper
    jobConf.setInt("popsize", pop);
    jobConf.setStrings("dataset", dataset);

    // int map = jobConf.getNumMapTasks();
    // System.out.println("Number of Maps"+ map);

    // start the  map/reduce job
    System.out.println("Starting Job");

    // get the start time for this job
    final long startTime = System.currentTimeMillis();

    // Submit the job, then poll for progress until the job is complete
    JobClient.runJob(jobConf);

    // get the end time for this job
    final long endTime = System.currentTimeMillis();

    // get the duration of this job
    final double duration = (endTime - startTime) / 1000.0;
    // System.out.println("Job Finished in " + duration + " seconds");
    // getElapsedTime(startTime - endTime);

    return 0;
  }

Example #2

Show file

File: JobBuilder.java Project: Prasadidasi/commoncrawl-crawler

 public JobBuilder mapRunner(Class<? extends MapRunner> mapRunner) throws IOException {
   _jobConf.setMapRunnerClass(mapRunner);
   _jobConf.setJarByClass(mapRunner);
   return this;
 }

Example #3

Show file

File: S3GetMetdataJob.java Project: arefugee/commoncrawl-crawler

  public static void main(String[] args) {

    String accessKey = args[0];
    String secretKey = args[1];

    String paths[] = {
      // "2008/06",
      // "2008/07",
      // "2008/08",
      // "2008/09",
      // "2008/10",
      // "2008/11",
      "2009"
    };

    for (int pathIndex = 0; pathIndex < paths.length; ++pathIndex) {

      LOG.info("Processing Path:" + paths[pathIndex]);

      JobConf job = new JobConf(S3GetMetdataJob.class);

      Path tempDir =
          new Path(
              job.get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

      LOG.info("Output for Path:" + paths[pathIndex] + " is:" + tempDir);
      System.out.println("Output Path is:" + tempDir);

      job.setJobName("S3 To CrawlURLMetadata Job for Path:" + paths[pathIndex]);

      // setup s3 properties
      JetS3tARCSource.setMaxRetries(job, 1);
      // set up S3 credentials ...
      JetS3tARCSource.setAWSAccessKeyID(job, accessKey);
      JetS3tARCSource.setAWSSecretAccessKey(job, secretKey);
      ARCSplitCalculator.setFilesPerSplit(job, 25);
      // set up arc reader properties
      ArcFileReader.setIOTimeoutValue(30000);
      // set input prefixes ...
      JetS3tARCSource.setInputPrefixes(job, paths[pathIndex]);
      // and S3 bucket name ...
      JetS3tARCSource.setBucketName(job, "commoncrawl");
      // and setup arc source for ArcInputFormat
      ARCInputFormat.setARCSourceClass(job, JetS3tARCSource.class);
      // and set up input format ...
      job.setInputFormat(ARCInputFormat.class);
      // set mapper ...
      job.setMapRunnerClass(S3GetMetdataJob.class);
      // setup reducer (identity in this case ... )
      job.setReducerClass(IdentityReducer.class);
      // standard output format ...
      job.setOutputFormat(SequenceFileOutputFormat.class);
      // set output path
      FileOutputFormat.setOutputPath(job, tempDir);
      // map output types
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(CrawlURLMetadata.class);
      // reduce output types
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(CrawlURLMetadata.class);
      // double the number of reducers ...
      // job.setNumReduceTasks(job.getNumReduceTasks() * 2);

      // run the job ...
      try {
        LOG.info("Starting Job:" + job.getJobName());
        JobClient.runJob(job);
        LOG.info("Finished Job:" + job.getJobName());

        Path finalPath = new Path("jobout/" + paths[pathIndex] + "/result");
        LOG.info("Copying Job Output to:" + finalPath);
        FileSystem fs = FileSystem.get(job);

        try {
          fs.mkdirs(finalPath.getParent());
          fs.rename(tempDir, finalPath);
          LOG.info("Copied Job Output to:" + finalPath);
        } finally {
          // fs.close();
        }

      } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
      }
    }
  }

Example #4

Show file

File: BuildWikipediaForwardIndex.java Project: sdiao/Cloud9

  @SuppressWarnings("static-access")
  @Override
  public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
        OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(
        OptionBuilder.withArgName("path")
            .hasArg()
            .withDescription("tmp output directory")
            .create(OUTPUT_OPTION));
    options.addOption(
        OptionBuilder.withArgName("path")
            .hasArg()
            .withDescription("index file")
            .create(INDEX_FILE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
      cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
      System.err.println("Error parsing command line: " + exp.getMessage());
      return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION)
        || !cmdline.hasOption(OUTPUT_OPTION)
        || !cmdline.hasOption(INDEX_FILE_OPTION)) {
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp(this.getClass().getName(), options);
      ToolRunner.printGenericCommandUsage(System.out);
      return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    if (!inputPath.isAbsolute()) {
      System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
      return -1;
    }

    JobConf conf = new JobConf(getConf(), BuildWikipediaForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");

    conf.setJobName(
        String.format(
            "BuildWikipediaForwardIndex[%s: %s, %s: %s]",
            INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    fs.delete(new Path(outputPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.findCounter(Blocks.Total).getCounter();

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(outputPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF("edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex");
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
      String[] arr = line.toString().split("\\s+");

      int docno = Integer.parseInt(arr[0]);
      int offset = Integer.parseInt(arr[1]);
      short fileno = Short.parseShort(arr[2]);

      out.writeInt(docno);
      out.writeInt(offset);
      out.writeShort(fileno);

      cnt++;

      if (cnt % 100000 == 0) {
        LOG.info(cnt + " blocks written");
      }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
      throw new RuntimeException("Error: mismatch in block count!");
    }

    return 0;
  }