public static void main(String[] args) throws Exception { final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020"; Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(WordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); if (args.length > 2) { job.setNumReduceTasks(Integer.parseInt(args[2])); } job.setMapperClass(CountMapper.class); job.setReducerClass(CountReducer.class); job.setJarByClass(WordCount.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt")); FileSystem fs = FileSystem.get(conf); // handle (e.g. delete) existing output path Path outputDestination = new Path(args[0] + args[1]); if (fs.exists(outputDestination)) { fs.delete(outputDestination, true); } // set output path & start job1 FileOutputFormat.setOutputPath(job, outputDestination); int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { Path tempDir = new Path("/user/akhfa/temp"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(AuthorCounter.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempDir); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }