@Override public int run(String[] arg0) throws Exception { Job job = Job.getInstance(getConf(), "PopulationJob"); Configuration conf = job.getConfiguration(); job.setJarByClass(Population.class); Path out = new Path("totalorder"); FileInputFormat.setInputPaths(job, "populations"); FileOutputFormat.setOutputPath(job, out); out.getFileSystem(conf).delete(out, true); job.setMapperClass(PopulationMapper.class); job.setReducerClass(PopulationReducer.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5); // Configure the TotalOrderPartitioner here... job.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<Text, Text> sampler = new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3); InputSampler.writePartitionFile(job, sampler); String partitionFile = TotalOrderPartitioner.getPartitionFile(conf); URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); job.addCacheFile(partitionURI); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; } job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<IntWritable, Text> sampler = new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10); InputSampler.writePartitionFile(job, sampler); // Add to DistributedCache Configuration conf = job.getConfiguration(); String partitionFile = TotalOrderPartitioner.getPartitionFile(conf); URI partitionUri = new URI(partitionFile); job.addCacheFile(partitionUri); return job.waitForCompletion(true) ? 0 : 1; }
/** Runs this tool. */ public int run(String[] argv) throws Exception { Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); Job job = Job.getInstance(getConf()); job.setJobName(CooccurrenceStripes.class.getSimpleName()); job.setJarByClass(CooccurrenceStripes.class); job.addCacheFile(new URI("/tmp/mergedLineCounts2#mergedLineCounts2")); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapStIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HMapStFW.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.getConfiguration().setInt("mapred.max.split.size", 1024 * 1024 * 64); job.getConfiguration().set("mapreduce.map.memory.mb", "3072"); job.getConfiguration().set("mapreduce.map.java.opts", "-Xmx3072m"); job.getConfiguration().set("mapreduce.reduce.memory.mb", "3072"); job.getConfiguration().set("mapreduce.reduce.java.opts", "-Xmx3072m"); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println( "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
@Override public int run(String[] args) throws Exception { String job_id = FLATTEN_OUTPUT_DIR.getName(); // Flatten MR job first Job flattenJob = Job.getInstance(getConf(), "Flatten input job [" + job_id + "]"); flattenJob.setJarByClass(IntraHD_2.class); flattenJob.setMapperClass(FlattenMapper.class); flattenJob.setReducerClass(FlattenReducer.class); flattenJob.setInputFormatClass(KeyValueTextInputFormat.class); flattenJob.setMapOutputKeyClass(Text.class); flattenJob.setMapOutputValueClass(BytesWritable.class); flattenJob.setOutputKeyClass(Text.class); flattenJob.setOutputValueClass(BytesArrayWritable.class); flattenJob.setOutputFormatClass(SequenceFileOutputFormat.class); flattenJob.setNumReduceTasks(1); FileInputFormat.addInputPath(flattenJob, FLATTEN_INPUT_FILE); FileOutputFormat.setOutputPath(flattenJob, FLATTEN_OUTPUT_DIR); if (flattenJob.waitForCompletion(true) == false) return 1; // Calculat each correct ID Job correctIDJob = Job.getInstance(getConf(), "CorrectIDs Job [" + job_id + "]"); correctIDJob.setJarByClass(IntraHD_2.class); correctIDJob.setMapperClass(CorrectIDMapper.class); correctIDJob.setReducerClass(CorrectIDReducer.class); correctIDJob.setCombinerClass(CorrectIDCombiner.class); correctIDJob.setInputFormatClass(SequenceFileInputFormat.class); correctIDJob.setMapOutputKeyClass(Text.class); correctIDJob.setMapOutputValueClass(ArrayIntPair.class); correctIDJob.setOutputFormatClass(SequenceFileOutputFormat.class); correctIDJob.setOutputKeyClass(Text.class); correctIDJob.setOutputValueClass(BytesWritable.class); correctIDJob.setNumReduceTasks(1); FileInputFormat.addInputPath(correctIDJob, CORRECTID_INPUT_FILE); FileOutputFormat.setOutputPath(correctIDJob, CORRECTID_OUTPUT_DIR); if (correctIDJob.waitForCompletion(true) == false) return 1; // calculate Hamming distance using correct IDs derived in previous step Job HDJob = Job.getInstance(getConf(), "Hamming distance Job with CorrectIDs [" + job_id + "]"); HDJob.setJarByClass(IntraHD_2.class); HDJob.setMapperClass(HDwithCorrectIDMapper.class); HDJob.setReducerClass(AverageHDReducer.class); HDJob.setCombinerClass(HDCombiner.class); HDJob.setInputFormatClass(SequenceFileInputFormat.class); HDJob.setMapOutputKeyClass(IntWritable.class); HDJob.setMapOutputValueClass(IntPair.class); HDJob.setOutputKeyClass(IntWritable.class); HDJob.setOutputValueClass(DoubleWritable.class); HDJob.setNumReduceTasks(1); HDJob.addCacheFile(CACHE); FileInputFormat.addInputPath(HDJob, HD_INPUT_FILE); FileOutputFormat.setOutputPath(HDJob, HD_OUTPUT_DIR); return HDJob.waitForCompletion(true) ? 0 : 1; }