@Override public int run(String[] arg0) throws Exception { // config a job and start it Configuration conf = getConf(); Job job = new Job(conf, "Index construction.."); job.setJarByClass(IndexConstructorDriver.class); job.setMapperClass(IndexConstructorMapper.class); job.setReducerClass(IndexConstructorReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(InvertedListWritable.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // can add the dir by the config FileSystem fs = FileSystem.get(conf); String workdir = conf.get("org.joy.crawler.dir", "crawler/"); fs.delete(new Path(workdir + "indexOutput/"), true); FileInputFormat.addInputPath(job, new Path(workdir + "content/")); FileOutputFormat.setOutputPath(job, new Path(workdir + "indexOutput/")); System.out.println( "indexer starts to work, it begins to construct the index, please wait ...\n"); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String args[]) throws Exception { Configuration c = new Configuration(); if (args.length != 2) { System.out.println("provide sufficient arguments"); System.exit(-1); } Job job = Job.getInstance(c, "Wordcount"); job.setJarByClass(Wordcount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.submit(); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { String locatorHost = args[0]; int locatorPort = Integer.parseInt(args[1]); String hdfsHomeDir = args[2]; System.out.println( "KnownKeysMRv2 invoked with args (locatorHost = " + locatorHost + " locatorPort = " + locatorPort + " hdfsHomeDir = " + hdfsHomeDir); Configuration conf = getConf(); conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion"); conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir); conf.setBoolean(GFInputFormat.CHECKPOINT, false); conf.set(GFOutputFormat.REGION, "validationRegion"); conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost); conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort); Job job = Job.getInstance(conf, "knownKeysMRv2"); job.setInputFormatClass(GFInputFormat.class); job.setOutputFormatClass(GFOutputFormat.class); job.setMapperClass(KnownKeysMRv2Mapper.class); job.setMapOutputKeyClass(GFKey.class); job.setMapOutputValueClass(PEIWritable.class); job.setReducerClass(KnownKeysMRv2Reducer.class); // job.setOutputKeyClass(String.class); // job.setOutputValueClass(ValueHolder.class); return job.waitForCompletion(false) ? 0 : 1; }