public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml")); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml")); // ===== Stage 1 ===== Job job1 = new Job(conf, "Stage 1: Frequency Count"); job1.setJarByClass(HashCount1.class); job1.setMapperClass(Mapper1.class); // job1.setCombinerClass(Combine1.class); job1.setReducerClass(Reducer1.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setNumReduceTasks(1); FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz")); FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1")); job1.waitForCompletion(true); // ===== Stage 2 ===== Job job2 = new Job(conf, "Stage 2: Sort"); job2.setJarByClass(HashCount1.class); job2.setMapperClass(Mapper2.class); // job1.setCombinerClass(IntSumReducer.class); job2.setReducerClass(Reducer2.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); job2.setNumReduceTasks(1); FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1")); FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2")); job2.waitForCompletion(true); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf, "job"); job.setJarByClass(PVidConvert.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/")); job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "job1"); job1.setJarByClass(PVidConvert.class); job1.setMapperClass(Map2.class); job1.setReducerClass(Reduce2.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); // conf.set("stat_date", dateString); Job job = new Job(conf, "DayhslogUserDateNewMac"); job.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp")); FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true); job.setMapperClass(DayhslogUserDateNewMacMapper.class); job.setReducerClass(DayhslogUserDateNewMacReducer.class); // job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(10); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job resultJob = new Job(conf, "DayhslogUserDate"); resultJob.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(resultJob, new Path(args[0])); FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp")); FileOutputFormat.setOutputPath(resultJob, new Path(args[2])); FileSystem.get(conf).delete(new Path(args[2]), true); resultJob.setMapperClass(DayhslogUserDateMapper.class); resultJob.setReducerClass(DayhslogUserDateReducer.class); resultJob.setNumReduceTasks(10); resultJob.setMapOutputKeyClass(Text.class); resultJob.setMapOutputValueClass(Text.class); resultJob.setOutputKeyClass(Text.class); resultJob.setOutputValueClass(Text.class); code = resultJob.waitForCompletion(true) ? 0 : 1; } Path tmpPath = new Path(args[2] + "tmp"); FileSystem.get(conf).delete(tmpPath, true); System.exit(code); return code; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job1 = new Job(conf, "combine votes"); job1.setJarByClass(VoteCount.class); job1.setMapperClass(MergeFilesMapper.class); job1.setCombinerClass(MergedFilesReducer.class); job1.setReducerClass(MergedFilesReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp")); job1.waitForCompletion(true); Job job2 = new Job(conf, "votes count"); job2.setJarByClass(VoteCount.class); job2.setMapperClass(CalculateVotesMapper.class); job2.setCombinerClass(CalculateVotesReducer.class); job2.setReducerClass(CalculateVotesReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp")); FileOutputFormat.setOutputPath(job2, new Path(args[1])); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionparser = new GenericOptionsParser(conf, args); conf = optionparser.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); job.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOut = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOut); tmpOut.getFileSystem(conf).delete(tmpOut, true); job.setMapperClass(DeliverFormatForUVMapper.class); job.setCombinerClass(DeliverFormatForUVCombiner.class); job.setReducerClass(DeliverFormatForUVReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("reduce_num", 20)); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { // this job is for combining small files into one Job combineJob = new Job(conf, "CombineTmpData"); combineJob.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPath(combineJob, new Path(tmpDir)); FileOutputFormat.setOutputPath(combineJob, new Path(outputDir)); combineJob.setMapperClass(IdentityMapper.class); combineJob.setReducerClass(IdentityReducer.class); combineJob.setInputFormatClass(KeyValueTextInputFormat.class); combineJob.setOutputFormatClass(TextOutputFormat.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(Text.class); TextOutputFormat.setCompressOutput(combineJob, true); TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class); combineJob.setNumReduceTasks(1); code = combineJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOut, true); LzoIndexer lzoIndexer = new LzoIndexer(conf); lzoIndexer.index(new Path(outputDir)); System.exit(code); return code; }
static void configureIncrementalLoad( Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { // record this table name for creating writer by favored nodes LOG.info("bulkload locality sensitive enabled"); conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString()); } // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + regionLocator.getName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info( "Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, "ClientUserInstallMR"); job.setJarByClass(ClientUserInstallMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOutput = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOutput); tmpOutput.getFileSystem(conf).delete(tmpOutput, true); job.setMapperClass(ClientUserInstallFirstMapper.class); job.setReducerClass(ClientUserInstallFirstReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(30); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job secondJob = new Job(conf, "ClientUserInstallResult"); secondJob.setJarByClass(ClientUserInstallMR.class); conf.set("stat_date", conf.get("stat_date")); FileInputFormat.addInputPath(secondJob, new Path(tmpDir)); Path output = new Path(outputDir); FileOutputFormat.setOutputPath(secondJob, output); output.getFileSystem(conf).delete(output, true); secondJob.setMapperClass(ClientUserInstallSecondMapper.class); secondJob.setReducerClass(ClientUserInstallSecondReduce.class); secondJob.setInputFormatClass(KeyValueTextInputFormat.class); secondJob.setOutputFormatClass(TextOutputFormat.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(Text.class); secondJob.setNumReduceTasks(1); code = secondJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOutput, true); System.exit(code); return code; }
private void doMapReduce() { try { Job job = Job.getInstance(); job.getConfiguration().set(OutputFormat.NAMESPACE, "/"); job.getConfiguration().set(OutputFormat.TABLE, "LoadTest"); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue()); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0); job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows); job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients); job.setJarByClass(LoadTest.class); job.setJobName("Hypertable MapReduce connector LoadTest"); job.setInputFormatClass(LoadInputFormat.class); job.setOutputFormatClass(OutputFormat.class); job.setMapOutputKeyClass(KeyWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setMapperClass(LoadMapper.class); job.setReducerClass(LoadReducer.class); job.setNumReduceTasks(this.clients); job.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { final int ret = parseArgs(args); if (ret < 0) { return ret; } Job job = Job.getInstance(getConf()); job.setJarByClass(GreeDiFirst.class); job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount)); job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount); job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount); job.setNumReduceTasks(partitionCount); SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWithVectorWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(GreeDiReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); int simpleCount = 0; String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } double startTime = System.currentTimeMillis(); Job job = null; while (simpleCount < numOfIterations) { job = new Job(conf, "Fixed Iteration Experiment"); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1] + simpleCount)); job.setJarByClass(Apriori.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); job.waitForCompletion(true); simpleCount++; if (onlyLocalIteration) break; } org.apache.hadoop.mapreduce.Counter c = job.getCounters().findCounter("Apriori$TotalSum", "STARTS_WITH_DIGIT"); if (onlyLocalIteration) System.out.print("Hybrid Iteration: "); System.out.println( "Total time: " + (System.currentTimeMillis() - startTime) + "ms" + " my count: " + c.getValue()); }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Running aggregates for numerical attributes"; job.setJobName(jobName); job.setJarByClass(RunningAggregator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(RunningAggregator.AggrMapper.class); job.setReducerClass(RunningAggregator.AggrReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName); job.setJarByClass(UtilityPredictor.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UtilityPredictor.PredictionMapper.class); job.setReducerClass(UtilityPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
public Job getJob(Configuration conf) throws IOException { Job job = new Job(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); job.setMapperClass(Mapper.class); job.setReducerClass(PivotingReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(RuleWritable.SourcePartitioner.class); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected")); int maxSplitSize = conf.getInt("thrax.max-split-size", 0); if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); int numReducers = conf.getInt("thrax.reducers", 4); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted")); FileOutputFormat.setCompressOutput(job, true); return job; }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public static void main(String[] args) throws Exception { /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure. Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: hubsandspokesload <in> <out> <finalout>"); System.exit(2); } Job job = new Job(conf, "hubsandspokesload"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeLoadMapper.class); job.setReducerClass(HubSpokeLoadReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean b = job.waitForCompletion(true); if (!b) { System.exit(2); } /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke /// Value at each Node in the graph iteratively. dijkstra(otherArgs[1], otherArgs[2]); // dijkstra("output1", "finaloutput"); }
@Override public int run(String[] args) throws Exception { if (args.length < 7) { System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "OSM-Gridding"); job.setJarByClass(OSMGrid.class); job.setOutputKeyClass(WritablePoint.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(OSMMapper.class); job.setPartitionerClass(GridPartitioner.class); job.setReducerClass(OSMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set(OSMMapper.MINLAT, args[2]); job.getConfiguration().set(OSMMapper.MINLON, args[3]); job.getConfiguration().set(OSMMapper.MAXLAT, args[4]); job.getConfiguration().set(OSMMapper.MAXLON, args[5]); job.getConfiguration().set(OSMReducer.GRID, args[6]); job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6])); boolean succ = job.waitForCompletion(true); return succ ? 0 : 1; }
private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.set("job.name", System.currentTimeMillis() + "/"); Properties configProps = loadJobProperties(); CacheUtils.addSerializableToCache(conf, randomDenseMapVector(100000), "inputVector"); Job job = new Job(conf, "matrix multiply"); job.setJarByClass(MatrixMultiplyJob.class); job.setMapperClass(MatrixMultiplyMapper.class); job.setReducerClass(MatrixMultiplyReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(MapVectorWritableComparable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String timestamp = new Date().toString().replace(' ', '_').replace(':', '_'); FileInputFormat.addInputPath( job, new Path(configProps.getProperty("sparse.vector.output.path"))); FileOutputFormat.setOutputPath( job, new Path(configProps.getProperty("dense.vector.output.path") + timestamp)); return job.waitForCompletion(true) ? 1 : -1; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: RemoveDup <in> [<in>...] <out>"); System.exit(2); } // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在) // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]); Job job = Job.getInstance(conf, "RemoveDup"); job.setJarByClass(RemoveDup.class); job.setMapperClass(RemoveDupMapper.class); job.setCombinerClass(RemoveDupReducer.class); job.setReducerClass(RemoveDupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
public static void main(String[] args) throws Exception { String paths = "/user/cloudera/00"; String path1 = paths; String path2 = ""; for (int i = 1; i <= 3; i++) { System.out.println("Now exectuing the " + i + "-th job!"); Job job = new Job(); path2 = paths + i; job.setJarByClass(PageRank.class); job.setJobName("PageRank"); path2 = paths + i; FileInputFormat.addInputPath(job, new Path(path1)); FileOutputFormat.setOutputPath(job, new Path(path2)); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); path1 = path2; job.waitForCompletion(true); } // System.exit(job.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { // TODO Auto-generated method stub Job job = Job.getInstance( getConf(), "Import vessel locations from files in " + args[0] + " into table cdb_vessel:vessel_location"); // co FileInputFormat.addInputPath(job, new Path(args[0])); job.setJarByClass(ImportVTLocationFromFileWithReducer.class); job.setJobName("Vessel_location_injection"); job.setInputFormatClass(VTVesselLocationFileInputFormat.class); job.setMapOutputKeyClass(Key_IMOAndRecordTime.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setPartitionerClass(Partitioner_IMO.class); job.setGroupingComparatorClass(GroupComparator_IMO.class); job.setReducerClass(ImportReducer.class); job.setNumReduceTasks(Integer.parseInt(args[1])); job.setOutputFormatClass(NullOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws Exception { sourcePhoto = "/home/hduser/workspace/images/source.jpg"; sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto); final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "similar photo"); job.setJarByClass(MdbSimilarPhoto.class); // Mapper,Reduce and Combiner type definition job.setMapperClass(PhotoMapper.class); job.setCombinerClass(SimilarityReducer.class); job.setReducerClass(SimilarityReducer.class); // output key/value type definition job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // InputFormat and OutputFormat type definition job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { System.out.println("-------------Printing configuration-------------------"); Configuration conf = getConf(); for (Entry<String, String> entry : conf) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } System.out.println("-------------Printing configuration done--------------"); Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; } job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MaxTemperatureMapperWithCounters.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // TODO Auto-generated method stub JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(5); FileSystem fs = FileSystem.get(conf); Path dir = new Path(args[0]); FileStatus[] stats = fs.listStatus(dir); numFiles = stats.length; Job job = new Job(conf); job.setJarByClass(FileCombiner.class); job.setJobName("File Combiner"); job.setMapperClass(FileCombinerMapper.class); job.setReducerClass(FileCombinerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }