public Job getJob(Configuration conf) throws IOException { Job job = new Job(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); job.setMapperClass(Mapper.class); job.setReducerClass(PivotingReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(RuleWritable.SourcePartitioner.class); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected")); int maxSplitSize = conf.getInt("thrax.max-split-size", 0); if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); int numReducers = conf.getInt("thrax.reducers", 4); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted")); FileOutputFormat.setCompressOutput(job, true); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job1 = new Job(conf, "combine votes"); job1.setJarByClass(VoteCount.class); job1.setMapperClass(MergeFilesMapper.class); job1.setCombinerClass(MergedFilesReducer.class); job1.setReducerClass(MergedFilesReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp")); job1.waitForCompletion(true); Job job2 = new Job(conf, "votes count"); job2.setJarByClass(VoteCount.class); job2.setMapperClass(CalculateVotesMapper.class); job2.setCombinerClass(CalculateVotesReducer.class); job2.setReducerClass(CalculateVotesReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp")); FileOutputFormat.setOutputPath(job2, new Path(args[1])); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
/** RUN */ @Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: wordcount <input_dir> <output_dir> <reducers>"); return -1; } Job job = new Job(getConf(), "PigMix L13"); job.setJarByClass(L13.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(ReadInput.class); job.setReducerClass(Join.class); Properties props = System.getProperties(); Configuration conf = job.getConfiguration(); for (Map.Entry<Object, Object> entry : props.entrySet()) { conf.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_page_views")); FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_power_users_samples")); FileOutputFormat.setOutputPath(job, new Path(args[1] + "/L13out")); job.setNumReduceTasks(Integer.parseInt(args[2])); return job.waitForCompletion(true) ? 0 : -1; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 3) { // System.err.println("Usage: <tradeTableDir> <payTableDir> <output>"); // System.exit(2); // } // String tradeTableDir = args[0]; // String payTableDir = args[1]; // String joinTableDir = args[2]; Job job = new Job(conf, "Join"); job.setJarByClass(JoinMain.class); job.setMapperClass(PreMapper.class); job.setMapOutputKeyClass(TextPair.class); job.setPartitionerClass(KeyPartition.class); job.setGroupingComparatorClass(FirstComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(CommonReduce.class); FileInputFormat.addInputPath(job, new Path("/user/hadoop/input/load3/action.txt")); FileInputFormat.addInputPath(job, new Path("/user/hadoop/input/load3/alipay.txt")); FileOutputFormat.setOutputPath(job, new Path("/user/hadoop/output3/")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void runDailyTrendEstimationJob(String inputPath) throws Exception { Configuration conf = new Configuration(); Job job; try { conf.set("mongo.output.uri", "mongodb://52.33.93.221:27017/mongo_hadoop.page_trends"); job = Job.getInstance(conf, "dataclean"); job.setJarByClass(DataCleanJob.class); job.setMapperClass(DataCleanMapper.class); job.setReducerClass(DataCleanReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PageDataValue.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(MongoUpdateWritable.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.addInputPath(job, new Path(inputPath)); MongoConfig mongoConfig = new MongoConfig(conf); mongoConfig.setOutputFormat(MongoOutputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml")); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml")); // ===== Stage 1 ===== Job job1 = new Job(conf, "Stage 1: Frequency Count"); job1.setJarByClass(HashCount1.class); job1.setMapperClass(Mapper1.class); // job1.setCombinerClass(Combine1.class); job1.setReducerClass(Reducer1.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setNumReduceTasks(1); FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz")); FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1")); job1.waitForCompletion(true); // ===== Stage 2 ===== Job job2 = new Job(conf, "Stage 2: Sort"); job2.setJarByClass(HashCount1.class); job2.setMapperClass(Mapper2.class); // job1.setCombinerClass(IntSumReducer.class); job2.setReducerClass(Reducer2.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); job2.setNumReduceTasks(1); FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1")); FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2")); job2.waitForCompletion(true); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); // conf.set("stat_date", dateString); Job job = new Job(conf, "DayhslogUserDateNewMac"); job.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp")); FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true); job.setMapperClass(DayhslogUserDateNewMacMapper.class); job.setReducerClass(DayhslogUserDateNewMacReducer.class); // job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(10); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job resultJob = new Job(conf, "DayhslogUserDate"); resultJob.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(resultJob, new Path(args[0])); FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp")); FileOutputFormat.setOutputPath(resultJob, new Path(args[2])); FileSystem.get(conf).delete(new Path(args[2]), true); resultJob.setMapperClass(DayhslogUserDateMapper.class); resultJob.setReducerClass(DayhslogUserDateReducer.class); resultJob.setNumReduceTasks(10); resultJob.setMapOutputKeyClass(Text.class); resultJob.setMapOutputValueClass(Text.class); resultJob.setOutputKeyClass(Text.class); resultJob.setOutputValueClass(Text.class); code = resultJob.waitForCompletion(true) ? 0 : 1; } Path tmpPath = new Path(args[2] + "tmp"); FileSystem.get(conf).delete(tmpPath, true); System.exit(code); return code; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60); Path train_file = new Path(args[0]); Path test_file = new Path(args[1]); conf.set("train_file", train_file.getParent().getName()); Job job = new Job(conf, "MapTestID"); job.setJarByClass(MapTestID.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MapTestID.MapTestIDMap.class); job.setReducerClass(MapTestID.MapTestIDReduce.class); job.setNumReduceTasks(300); FileInputFormat.addInputPath(job, train_file); FileInputFormat.addInputPath(job, test_file); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf, "job"); job.setJarByClass(PVidConvert.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/")); job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "job1"); job1.setJarByClass(PVidConvert.class); job1.setMapperClass(Map2.class); job1.setReducerClass(Reduce2.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionparser = new GenericOptionsParser(conf, args); conf = optionparser.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); job.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOut = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOut); tmpOut.getFileSystem(conf).delete(tmpOut, true); job.setMapperClass(DeliverFormatForUVMapper.class); job.setCombinerClass(DeliverFormatForUVCombiner.class); job.setReducerClass(DeliverFormatForUVReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("reduce_num", 20)); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { // this job is for combining small files into one Job combineJob = new Job(conf, "CombineTmpData"); combineJob.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPath(combineJob, new Path(tmpDir)); FileOutputFormat.setOutputPath(combineJob, new Path(outputDir)); combineJob.setMapperClass(IdentityMapper.class); combineJob.setReducerClass(IdentityReducer.class); combineJob.setInputFormatClass(KeyValueTextInputFormat.class); combineJob.setOutputFormatClass(TextOutputFormat.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(Text.class); TextOutputFormat.setCompressOutput(combineJob, true); TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class); combineJob.setNumReduceTasks(1); code = combineJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOut, true); LzoIndexer lzoIndexer = new LzoIndexer(conf); lzoIndexer.index(new Path(outputDir)); System.exit(code); return code; }
/** get input paths to job config */ public static boolean addInputPaths(String pathString, Job job) throws IOException { Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.get(conf); HashSet<Path> paths = new HashSet<Path>(); if (getAllSubDirs(URI.create(pathString), job, paths)) { paths.addAll(Arrays.asList(FileInputFormat.getInputPaths(job))); FileInputFormat.setInputPaths(job, paths.toArray(new Path[0])); return true; } return false; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, "ClientUserInstallMR"); job.setJarByClass(ClientUserInstallMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOutput = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOutput); tmpOutput.getFileSystem(conf).delete(tmpOutput, true); job.setMapperClass(ClientUserInstallFirstMapper.class); job.setReducerClass(ClientUserInstallFirstReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(30); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job secondJob = new Job(conf, "ClientUserInstallResult"); secondJob.setJarByClass(ClientUserInstallMR.class); conf.set("stat_date", conf.get("stat_date")); FileInputFormat.addInputPath(secondJob, new Path(tmpDir)); Path output = new Path(outputDir); FileOutputFormat.setOutputPath(secondJob, output); output.getFileSystem(conf).delete(output, true); secondJob.setMapperClass(ClientUserInstallSecondMapper.class); secondJob.setReducerClass(ClientUserInstallSecondReduce.class); secondJob.setInputFormatClass(KeyValueTextInputFormat.class); secondJob.setOutputFormatClass(TextOutputFormat.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(Text.class); secondJob.setNumReduceTasks(1); code = secondJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOutput, true); System.exit(code); return code; }
public static void main(String[] args) throws Exception { String paths = "/user/cloudera/00"; String path1 = paths; String path2 = ""; for (int i = 1; i <= 3; i++) { System.out.println("Now exectuing the " + i + "-th job!"); Job job = new Job(); path2 = paths + i; job.setJarByClass(PageRank.class); job.setJobName("PageRank"); path2 = paths + i; FileInputFormat.addInputPath(job, new Path(path1)); FileOutputFormat.setOutputPath(job, new Path(path2)); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); path1 = path2; job.waitForCompletion(true); } // System.exit(job.waitForCompletion(true) ? 0 : 1); }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: RemoveDup <in> [<in>...] <out>"); System.exit(2); } // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在) // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]); Job job = Job.getInstance(conf, "RemoveDup"); job.setJarByClass(RemoveDup.class); job.setMapperClass(RemoveDupMapper.class); job.setCombinerClass(RemoveDupReducer.class); job.setReducerClass(RemoveDupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public int run(String[] args) throws Exception { // TODO Auto-generated method stub Job job = Job.getInstance( getConf(), "Import vessel locations from files in " + args[0] + " into table cdb_vessel:vessel_location"); // co FileInputFormat.addInputPath(job, new Path(args[0])); job.setJarByClass(ImportVTLocationFromFileWithReducer.class); job.setJobName("Vessel_location_injection"); job.setInputFormatClass(VTVesselLocationFileInputFormat.class); job.setMapOutputKeyClass(Key_IMOAndRecordTime.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setPartitionerClass(Partitioner_IMO.class); job.setGroupingComparatorClass(GroupComparator_IMO.class); job.setReducerClass(ImportReducer.class); job.setNumReduceTasks(Integer.parseInt(args[1])); job.setOutputFormatClass(NullOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws Exception { /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure. Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: hubsandspokesload <in> <out> <finalout>"); System.exit(2); } Job job = new Job(conf, "hubsandspokesload"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeLoadMapper.class); job.setReducerClass(HubSpokeLoadReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean b = job.waitForCompletion(true); if (!b) { System.exit(2); } /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke /// Value at each Node in the graph iteratively. dijkstra(otherArgs[1], otherArgs[2]); // dijkstra("output1", "finaloutput"); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ESIndexCreator"); job.setJarByClass(ESIndexCreator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ESIndexCreator.MyMapper.class); job.setNumReduceTasks(0); // Skip Reduce Task job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // 프로그램 인자 // 0: 입력 파일 경로 // 1: 출력 파일 경로 // 2: elastic search server's host name FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set("host", args[2]); job.waitForCompletion(true); }
public Map<String, Object> run(Map<String, Object> args) throws Exception { getConf().setLong("injector.current.time", System.currentTimeMillis()); Path input; Object path = args.get(Nutch.ARG_SEEDDIR); if (path instanceof Path) { input = (Path) path; } else { input = new Path(path.toString()); } numJobs = 1; currentJobNum = 0; currentJob = new NutchJob(getConf(), "inject " + input); FileInputFormat.addInputPath(currentJob, input); currentJob.setMapperClass(UrlMapper.class); currentJob.setMapOutputKeyClass(String.class); currentJob.setMapOutputValueClass(WebPage.class); currentJob.setOutputFormatClass(GoraOutputFormat.class); DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(), String.class, WebPage.class); GoraOutputFormat.setOutput(currentJob, store, true); currentJob.setReducerClass(Reducer.class); currentJob.setNumReduceTasks(0); currentJob.waitForCompletion(true); ToolUtil.recordJobStatus(null, currentJob, results); return results; }
public int run(String[] args) throws Exception { if (args.length < 2) { LOG.error("Usage: " + getClass().getName() + " inputFileHDFS outputFileHDFS"); return 1; } String inputFile = args[0]; String outputFile = args[1] + System.nanoTime(); Configuration configuration = getConf(); Job job = new Job(configuration); job.setJarByClass(getClass()); job.setJobName(getClass().getName()); job.setMapperClass(ReadRequestMap.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path(outputFile)); job.waitForCompletion(true); return 0; }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Running aggregates for numerical attributes"; job.setJobName(jobName); job.setJarByClass(RunningAggregator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(RunningAggregator.AggrMapper.class); job.setReducerClass(RunningAggregator.AggrReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { if (args.length < 7) { System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "OSM-Gridding"); job.setJarByClass(OSMGrid.class); job.setOutputKeyClass(WritablePoint.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(OSMMapper.class); job.setPartitionerClass(GridPartitioner.class); job.setReducerClass(OSMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set(OSMMapper.MINLAT, args[2]); job.getConfiguration().set(OSMMapper.MINLON, args[3]); job.getConfiguration().set(OSMMapper.MAXLAT, args[4]); job.getConfiguration().set(OSMMapper.MAXLON, args[5]); job.getConfiguration().set(OSMReducer.GRID, args[6]); job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6])); boolean succ = job.waitForCompletion(true); return succ ? 0 : 1; }
@Override protected void configureJob(Job job) throws IOException { Configuration conf = job.getConfiguration(); job.setJarByClass(PartialBuilder.class); FileInputFormat.setInputPaths(job, getDataPath()); FileOutputFormat.setOutputPath(job, getOutputPath(conf)); job.setOutputKeyClass(TreeID.class); job.setOutputValueClass(MapredOutput.class); job.setMapperClass(Step1Mapper.class); job.setNumReduceTasks(0); // no reducers job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // For this implementation to work, mapred.map.tasks needs to be set to the actual // number of mappers Hadoop will use: TextInputFormat inputFormat = new TextInputFormat(); List<?> splits = inputFormat.getSplits(job); if (splits == null || splits.isEmpty()) { log.warn("Unable to compute number of splits?"); } else { int numSplits = splits.size(); log.info("Setting mapred.map.tasks = {}", numSplits); conf.setInt("mapred.map.tasks", numSplits); } }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName); job.setJarByClass(UtilityPredictor.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UtilityPredictor.PredictionMapper.class); job.setReducerClass(UtilityPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }