@Override
  public int run(String[] arg0) throws Exception {
    Job job = Job.getInstance(getConf(), "PopulationJob");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Population.class);

    Path out = new Path("totalorder");
    FileInputFormat.setInputPaths(job, "populations");
    FileOutputFormat.setOutputPath(job, out);
    out.getFileSystem(conf).delete(out, true);

    job.setMapperClass(PopulationMapper.class);
    job.setReducerClass(PopulationReducer.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(5);

    // Configure the TotalOrderPartitioner here...
    job.setPartitionerClass(TotalOrderPartitioner.class);
    InputSampler.Sampler<Text, Text> sampler =
        new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3);
    InputSampler.writePartitionFile(job, sampler);
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionURI);

    return job.waitForCompletion(true) ? 0 : 1;
  }
  @Override
  public int run(String[] args) throws Exception {
    Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
    if (job == null) {
      return -1;
    }

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    InputSampler.Sampler<IntWritable, Text> sampler =
        new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10);

    InputSampler.writePartitionFile(job, sampler);

    // Add to DistributedCache
    Configuration conf = job.getConfiguration();
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionUri = new URI(partitionFile);
    job.addCacheFile(partitionUri);

    return job.waitForCompletion(true) ? 0 : 1;
  }
  /** Runs this tool. */
  public int run(String[] argv) throws Exception {
    Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

    try {
      parser.parseArgument(argv);
    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      parser.printUsage(System.err);
      return -1;
    }

    LOG.info(" - input path: " + args.input);
    LOG.info(" - output path: " + args.output);
    LOG.info(" - number of reducers: " + args.numReducers);

    Job job = Job.getInstance(getConf());
    job.setJobName(CooccurrenceStripes.class.getSimpleName());
    job.setJarByClass(CooccurrenceStripes.class);

    job.addCacheFile(new URI("/tmp/mergedLineCounts2#mergedLineCounts2"));

    // Delete the output directory if it exists already.
    Path outputDir = new Path(args.output);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(args.numReducers);

    FileInputFormat.setInputPaths(job, new Path(args.input));
    FileOutputFormat.setOutputPath(job, new Path(args.output));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapStIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapStFW.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.getConfiguration().setInt("mapred.max.split.size", 1024 * 1024 * 64);
    job.getConfiguration().set("mapreduce.map.memory.mb", "3072");
    job.getConfiguration().set("mapreduce.map.java.opts", "-Xmx3072m");
    job.getConfiguration().set("mapreduce.reduce.memory.mb", "3072");
    job.getConfiguration().set("mapreduce.reduce.java.opts", "-Xmx3072m");

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println(
        "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
  }
Esempio n. 4
0
  @Override
  public int run(String[] args) throws Exception {
    String job_id = FLATTEN_OUTPUT_DIR.getName();

    // Flatten MR job first
    Job flattenJob = Job.getInstance(getConf(), "Flatten input job [" + job_id + "]");
    flattenJob.setJarByClass(IntraHD_2.class);
    flattenJob.setMapperClass(FlattenMapper.class);
    flattenJob.setReducerClass(FlattenReducer.class);

    flattenJob.setInputFormatClass(KeyValueTextInputFormat.class);
    flattenJob.setMapOutputKeyClass(Text.class);
    flattenJob.setMapOutputValueClass(BytesWritable.class);
    flattenJob.setOutputKeyClass(Text.class);
    flattenJob.setOutputValueClass(BytesArrayWritable.class);
    flattenJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    flattenJob.setNumReduceTasks(1);
    FileInputFormat.addInputPath(flattenJob, FLATTEN_INPUT_FILE);
    FileOutputFormat.setOutputPath(flattenJob, FLATTEN_OUTPUT_DIR);

    if (flattenJob.waitForCompletion(true) == false) return 1;

    // Calculat each correct ID
    Job correctIDJob = Job.getInstance(getConf(), "CorrectIDs Job [" + job_id + "]");
    correctIDJob.setJarByClass(IntraHD_2.class);
    correctIDJob.setMapperClass(CorrectIDMapper.class);
    correctIDJob.setReducerClass(CorrectIDReducer.class);
    correctIDJob.setCombinerClass(CorrectIDCombiner.class);

    correctIDJob.setInputFormatClass(SequenceFileInputFormat.class);
    correctIDJob.setMapOutputKeyClass(Text.class);
    correctIDJob.setMapOutputValueClass(ArrayIntPair.class);

    correctIDJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    correctIDJob.setOutputKeyClass(Text.class);
    correctIDJob.setOutputValueClass(BytesWritable.class);
    correctIDJob.setNumReduceTasks(1);

    FileInputFormat.addInputPath(correctIDJob, CORRECTID_INPUT_FILE);
    FileOutputFormat.setOutputPath(correctIDJob, CORRECTID_OUTPUT_DIR);

    if (correctIDJob.waitForCompletion(true) == false) return 1;

    // calculate Hamming distance using correct IDs derived in previous step
    Job HDJob = Job.getInstance(getConf(), "Hamming distance Job with CorrectIDs [" + job_id + "]");
    HDJob.setJarByClass(IntraHD_2.class);
    HDJob.setMapperClass(HDwithCorrectIDMapper.class);
    HDJob.setReducerClass(AverageHDReducer.class);
    HDJob.setCombinerClass(HDCombiner.class);

    HDJob.setInputFormatClass(SequenceFileInputFormat.class);
    HDJob.setMapOutputKeyClass(IntWritable.class);
    HDJob.setMapOutputValueClass(IntPair.class);

    HDJob.setOutputKeyClass(IntWritable.class);
    HDJob.setOutputValueClass(DoubleWritable.class);
    HDJob.setNumReduceTasks(1);
    HDJob.addCacheFile(CACHE);

    FileInputFormat.addInputPath(HDJob, HD_INPUT_FILE);
    FileOutputFormat.setOutputPath(HDJob, HD_OUTPUT_DIR);

    return HDJob.waitForCompletion(true) ? 0 : 1;
  }