public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf, "job"); job.setJarByClass(PVidConvert.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/")); job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "job1"); job1.setJarByClass(PVidConvert.class); job1.setMapperClass(Map2.class); job1.setReducerClass(Reduce2.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml")); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml")); // ===== Stage 1 ===== Job job1 = new Job(conf, "Stage 1: Frequency Count"); job1.setJarByClass(HashCount1.class); job1.setMapperClass(Mapper1.class); // job1.setCombinerClass(Combine1.class); job1.setReducerClass(Reducer1.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setNumReduceTasks(1); FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz")); FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1")); job1.waitForCompletion(true); // ===== Stage 2 ===== Job job2 = new Job(conf, "Stage 2: Sort"); job2.setJarByClass(HashCount1.class); job2.setMapperClass(Mapper2.class); // job1.setCombinerClass(IntSumReducer.class); job2.setReducerClass(Reducer2.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); job2.setNumReduceTasks(1); FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1")); FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2")); job2.waitForCompletion(true); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); // conf.set("stat_date", dateString); Job job = new Job(conf, "DayhslogUserDateNewMac"); job.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp")); FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true); job.setMapperClass(DayhslogUserDateNewMacMapper.class); job.setReducerClass(DayhslogUserDateNewMacReducer.class); // job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(10); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job resultJob = new Job(conf, "DayhslogUserDate"); resultJob.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(resultJob, new Path(args[0])); FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp")); FileOutputFormat.setOutputPath(resultJob, new Path(args[2])); FileSystem.get(conf).delete(new Path(args[2]), true); resultJob.setMapperClass(DayhslogUserDateMapper.class); resultJob.setReducerClass(DayhslogUserDateReducer.class); resultJob.setNumReduceTasks(10); resultJob.setMapOutputKeyClass(Text.class); resultJob.setMapOutputValueClass(Text.class); resultJob.setOutputKeyClass(Text.class); resultJob.setOutputValueClass(Text.class); code = resultJob.waitForCompletion(true) ? 0 : 1; } Path tmpPath = new Path(args[2] + "tmp"); FileSystem.get(conf).delete(tmpPath, true); System.exit(code); return code; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job1 = new Job(conf, "combine votes"); job1.setJarByClass(VoteCount.class); job1.setMapperClass(MergeFilesMapper.class); job1.setCombinerClass(MergedFilesReducer.class); job1.setReducerClass(MergedFilesReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp")); job1.waitForCompletion(true); Job job2 = new Job(conf, "votes count"); job2.setJarByClass(VoteCount.class); job2.setMapperClass(CalculateVotesMapper.class); job2.setCombinerClass(CalculateVotesReducer.class); job2.setReducerClass(CalculateVotesReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp")); FileOutputFormat.setOutputPath(job2, new Path(args[1])); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionparser = new GenericOptionsParser(conf, args); conf = optionparser.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); job.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOut = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOut); tmpOut.getFileSystem(conf).delete(tmpOut, true); job.setMapperClass(DeliverFormatForUVMapper.class); job.setCombinerClass(DeliverFormatForUVCombiner.class); job.setReducerClass(DeliverFormatForUVReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("reduce_num", 20)); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { // this job is for combining small files into one Job combineJob = new Job(conf, "CombineTmpData"); combineJob.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPath(combineJob, new Path(tmpDir)); FileOutputFormat.setOutputPath(combineJob, new Path(outputDir)); combineJob.setMapperClass(IdentityMapper.class); combineJob.setReducerClass(IdentityReducer.class); combineJob.setInputFormatClass(KeyValueTextInputFormat.class); combineJob.setOutputFormatClass(TextOutputFormat.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(Text.class); TextOutputFormat.setCompressOutput(combineJob, true); TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class); combineJob.setNumReduceTasks(1); code = combineJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOut, true); LzoIndexer lzoIndexer = new LzoIndexer(conf); lzoIndexer.index(new Path(outputDir)); System.exit(code); return code; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, "ClientUserInstallMR"); job.setJarByClass(ClientUserInstallMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOutput = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOutput); tmpOutput.getFileSystem(conf).delete(tmpOutput, true); job.setMapperClass(ClientUserInstallFirstMapper.class); job.setReducerClass(ClientUserInstallFirstReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(30); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job secondJob = new Job(conf, "ClientUserInstallResult"); secondJob.setJarByClass(ClientUserInstallMR.class); conf.set("stat_date", conf.get("stat_date")); FileInputFormat.addInputPath(secondJob, new Path(tmpDir)); Path output = new Path(outputDir); FileOutputFormat.setOutputPath(secondJob, output); output.getFileSystem(conf).delete(output, true); secondJob.setMapperClass(ClientUserInstallSecondMapper.class); secondJob.setReducerClass(ClientUserInstallSecondReduce.class); secondJob.setInputFormatClass(KeyValueTextInputFormat.class); secondJob.setOutputFormatClass(TextOutputFormat.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(Text.class); secondJob.setNumReduceTasks(1); code = secondJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOutput, true); System.exit(code); return code; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Running aggregates for numerical attributes"; job.setJobName(jobName); job.setJarByClass(RunningAggregator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(RunningAggregator.AggrMapper.class); job.setReducerClass(RunningAggregator.AggrReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length < 7) { System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "OSM-Gridding"); job.setJarByClass(OSMGrid.class); job.setOutputKeyClass(WritablePoint.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(OSMMapper.class); job.setPartitionerClass(GridPartitioner.class); job.setReducerClass(OSMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set(OSMMapper.MINLAT, args[2]); job.getConfiguration().set(OSMMapper.MINLON, args[3]); job.getConfiguration().set(OSMMapper.MAXLAT, args[4]); job.getConfiguration().set(OSMMapper.MAXLON, args[5]); job.getConfiguration().set(OSMReducer.GRID, args[6]); job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6])); boolean succ = job.waitForCompletion(true); return succ ? 0 : 1; }
@Override public int run(String[] args) throws Exception { String instance = args[0]; String zookeepers = args[1]; String user = args[2]; String tokenFile = args[3]; String input = args[4]; String tableName = args[5]; Job job = Job.getInstance(getConf()); job.setJobName(TokenFileWordCount.class.getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, input); job.setMapperClass(MapClass.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); // AccumuloInputFormat not used here, but it uses the same functions. AccumuloOutputFormat.setZooKeeperInstance( job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers)); AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tableName); job.waitForCompletion(true); return 0; }
private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
public void testSequenceOutputClassDefaultsToMapRedOutputClass() throws IOException { Job job = new Job(); // Setting Random class to test getSequenceFileOutput{Key,Value}Class job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(BooleanWritable.class); assertEquals( "SequenceFileOutputKeyClass should default to ouputKeyClass", FloatWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job)); assertEquals( "SequenceFileOutputValueClass should default to " + "ouputValueClass", BooleanWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job)); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class); assertEquals( "SequenceFileOutputKeyClass not updated", IntWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClass(job)); assertEquals( "SequenceFileOutputValueClass not updated", DoubleWritable.class, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClass(job)); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) */ @SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { //Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
@Override public int run(String[] args) throws Exception { System.out.println("-------------Printing configuration-------------------"); Configuration conf = getConf(); for (Entry<String, String> entry : conf) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } System.out.println("-------------Printing configuration done--------------"); Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; } job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MaxTemperatureMapperWithCounters.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); return job.waitForCompletion(true) ? 0 : 1; }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String rowKeyType = args[3]; conf.set("row.key.type", rowKeyType); conf.set("table.name", tableName); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes(columnFamily)); scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance( conf, "Count the column count and indexRecordSize for each row in " + tableName); job.setJarByClass(TruthyIndexFeatureCounter.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, TfcMapper.class, Text.class, Text.class, job, true); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); TableMapReduceUtil.addDependencyJars(job); return job; }
@Override public int run(String[] args) throws Exception { final int ret = parseArgs(args); if (ret < 0) { return ret; } Job job = Job.getInstance(getConf()); job.setJarByClass(GreeDiFirst.class); job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount)); job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount); job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount); job.setNumReduceTasks(partitionCount); SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWithVectorWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(GreeDiReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
public static void main(String[] args) throws Exception { /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure. Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: hubsandspokesload <in> <out> <finalout>"); System.exit(2); } Job job = new Job(conf, "hubsandspokesload"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeLoadMapper.class); job.setReducerClass(HubSpokeLoadReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean b = job.waitForCompletion(true); if (!b) { System.exit(2); } /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke /// Value at each Node in the graph iteratively. dijkstra(otherArgs[1], otherArgs[2]); // dijkstra("output1", "finaloutput"); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: RemoveDup <in> [<in>...] <out>"); System.exit(2); } // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在) // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]); Job job = Job.getInstance(conf, "RemoveDup"); job.setJarByClass(RemoveDup.class); job.setMapperClass(RemoveDupMapper.class); job.setCombinerClass(RemoveDupReducer.class); job.setReducerClass(RemoveDupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public static void main(String[] args) throws Exception { String paths = "/user/cloudera/00"; String path1 = paths; String path2 = ""; for (int i = 1; i <= 3; i++) { System.out.println("Now exectuing the " + i + "-th job!"); Job job = new Job(); path2 = paths + i; job.setJarByClass(PageRank.class); job.setJobName("PageRank"); path2 = paths + i; FileInputFormat.addInputPath(job, new Path(path1)); FileOutputFormat.setOutputPath(job, new Path(path2)); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); path1 = path2; job.waitForCompletion(true); } // System.exit(job.waitForCompletion(true) ? 0 : 1); }
public Job getJob(Configuration conf) throws IOException { Job job = new Job(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); job.setMapperClass(Mapper.class); job.setReducerClass(PivotingReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(RuleWritable.SourcePartitioner.class); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected")); int maxSplitSize = conf.getInt("thrax.max-split-size", 0); if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); int numReducers = conf.getInt("thrax.reducers", 4); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted")); FileOutputFormat.setCompressOutput(job, true); return job; }
public static void main(String[] args) throws Exception { sourcePhoto = "/home/hduser/workspace/images/source.jpg"; sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto); final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "similar photo"); job.setJarByClass(MdbSimilarPhoto.class); // Mapper,Reduce and Combiner type definition job.setMapperClass(PhotoMapper.class); job.setCombinerClass(SimilarityReducer.class); job.setReducerClass(SimilarityReducer.class); // output key/value type definition job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // InputFormat and OutputFormat type definition job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName); job.setJarByClass(UtilityPredictor.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UtilityPredictor.PredictionMapper.class); job.setReducerClass(UtilityPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override protected void configureJob(Job job) throws IOException { Configuration conf = job.getConfiguration(); job.setJarByClass(PartialBuilder.class); FileInputFormat.setInputPaths(job, getDataPath()); FileOutputFormat.setOutputPath(job, getOutputPath(conf)); job.setOutputKeyClass(TreeID.class); job.setOutputValueClass(MapredOutput.class); job.setMapperClass(Step1Mapper.class); job.setNumReduceTasks(0); // no reducers job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // For this implementation to work, mapred.map.tasks needs to be set to the actual // number of mappers Hadoop will use: TextInputFormat inputFormat = new TextInputFormat(); List<?> splits = inputFormat.getSplits(job); if (splits == null || splits.isEmpty()) { log.warn("Unable to compute number of splits?"); } else { int numSplits = splits.size(); log.info("Setting mapred.map.tasks = {}", numSplits); conf.setInt("mapred.map.tasks", numSplits); } }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ESIndexCreator"); job.setJarByClass(ESIndexCreator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ESIndexCreator.MyMapper.class); job.setNumReduceTasks(0); // Skip Reduce Task job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // 프로그램 인자 // 0: 입력 파일 경로 // 1: 출력 파일 경로 // 2: elastic search server's host name FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set("host", args[2]); job.waitForCompletion(true); }