public int run(String[] args) throws Exception { Path inputPath = new Path("weblog_entries.txt"); Path outputPath = new Path("output"); Schema schema = ReflectData.get().getSchema(WeblogRecord.class); Configuration conf = getConf(); Job weblogJob = Job.getInstance(conf); weblogJob.setJobName("Avro Writer"); weblogJob.setJarByClass(getClass()); weblogJob.setNumReduceTasks(0); weblogJob.setMapperClass(WeblogMapper_Ex_5.class); weblogJob.setMapOutputKeyClass(AvroWrapper.class); weblogJob.setMapOutputValueClass(NullWritable.class); weblogJob.setInputFormatClass(TextInputFormat.class); AvroJob.setOutputKeySchema(weblogJob, schema); FileInputFormat.setInputPaths(weblogJob, inputPath); FileOutputFormat.setOutputPath(weblogJob, outputPath); if (weblogJob.waitForCompletion(true)) { return 0; } return 1; }
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 2) { System.err.printf( "Usage: %s <comma separated paths> <output path>\n", this.getClass().getName()); return -1; } Job job = Job.getInstance(); job.setJobName("PasmJoin"); job.setJarByClass(PsamXY.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AvroValue.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(PsamXYMapper.class); job.setReducerClass(PsamXYReducer.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); FileInputFormat.setInputPaths(job, args[0]); Path output = new Path(args[1]); FileOutputFormat.setOutputPath(job, output); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); AvroJob.setOutputKeySchema(job, outputSchema); AvroJob.setMapOutputValueSchema(job, outputSchema); // DistributedCache.addCacheFile(new Path("BM_TERM_TYPE_DMT.avro").toUri(), // job.getConfiguration()); job.setNumReduceTasks(1); job.submit(); job.waitForCompletion(true); return 0; }