public Job getJob(Configuration conf) throws IOException { Job job = new Job(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); job.setMapperClass(Mapper.class); job.setReducerClass(PivotingReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(RuleWritable.SourcePartitioner.class); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected")); int maxSplitSize = conf.getInt("thrax.max-split-size", 0); if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); int numReducers = conf.getInt("thrax.reducers", 4); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted")); FileOutputFormat.setCompressOutput(job, true); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job1 = new Job(conf, "combine votes"); job1.setJarByClass(VoteCount.class); job1.setMapperClass(MergeFilesMapper.class); job1.setCombinerClass(MergedFilesReducer.class); job1.setReducerClass(MergedFilesReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp")); job1.waitForCompletion(true); Job job2 = new Job(conf, "votes count"); job2.setJarByClass(VoteCount.class); job2.setMapperClass(CalculateVotesMapper.class); job2.setCombinerClass(CalculateVotesReducer.class); job2.setReducerClass(CalculateVotesReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp")); FileOutputFormat.setOutputPath(job2, new Path(args[1])); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml")); // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml")); // ===== Stage 1 ===== Job job1 = new Job(conf, "Stage 1: Frequency Count"); job1.setJarByClass(HashCount1.class); job1.setMapperClass(Mapper1.class); // job1.setCombinerClass(Combine1.class); job1.setReducerClass(Reducer1.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setNumReduceTasks(1); FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz")); FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1")); job1.waitForCompletion(true); // ===== Stage 2 ===== Job job2 = new Job(conf, "Stage 2: Sort"); job2.setJarByClass(HashCount1.class); job2.setMapperClass(Mapper2.class); // job1.setCombinerClass(IntSumReducer.class); job2.setReducerClass(Reducer2.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); job2.setNumReduceTasks(1); FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1")); FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2")); job2.waitForCompletion(true); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); // conf.set("stat_date", dateString); Job job = new Job(conf, "DayhslogUserDateNewMac"); job.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp")); FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true); job.setMapperClass(DayhslogUserDateNewMacMapper.class); job.setReducerClass(DayhslogUserDateNewMacReducer.class); // job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(10); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job resultJob = new Job(conf, "DayhslogUserDate"); resultJob.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(resultJob, new Path(args[0])); FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp")); FileOutputFormat.setOutputPath(resultJob, new Path(args[2])); FileSystem.get(conf).delete(new Path(args[2]), true); resultJob.setMapperClass(DayhslogUserDateMapper.class); resultJob.setReducerClass(DayhslogUserDateReducer.class); resultJob.setNumReduceTasks(10); resultJob.setMapOutputKeyClass(Text.class); resultJob.setMapOutputValueClass(Text.class); resultJob.setOutputKeyClass(Text.class); resultJob.setOutputValueClass(Text.class); code = resultJob.waitForCompletion(true) ? 0 : 1; } Path tmpPath = new Path(args[2] + "tmp"); FileSystem.get(conf).delete(tmpPath, true); System.exit(code); return code; }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf, "job"); job.setJarByClass(PVidConvert.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/")); job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "job1"); job1.setJarByClass(PVidConvert.class); job1.setMapperClass(Map2.class); job1.setReducerClass(Reduce2.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionparser = new GenericOptionsParser(conf, args); conf = optionparser.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); job.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOut = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOut); tmpOut.getFileSystem(conf).delete(tmpOut, true); job.setMapperClass(DeliverFormatForUVMapper.class); job.setCombinerClass(DeliverFormatForUVCombiner.class); job.setReducerClass(DeliverFormatForUVReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("reduce_num", 20)); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { // this job is for combining small files into one Job combineJob = new Job(conf, "CombineTmpData"); combineJob.setJarByClass(DeliverFormatForUVMR.class); FileInputFormat.addInputPath(combineJob, new Path(tmpDir)); FileOutputFormat.setOutputPath(combineJob, new Path(outputDir)); combineJob.setMapperClass(IdentityMapper.class); combineJob.setReducerClass(IdentityReducer.class); combineJob.setInputFormatClass(KeyValueTextInputFormat.class); combineJob.setOutputFormatClass(TextOutputFormat.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(Text.class); TextOutputFormat.setCompressOutput(combineJob, true); TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class); combineJob.setNumReduceTasks(1); code = combineJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOut, true); LzoIndexer lzoIndexer = new LzoIndexer(conf); lzoIndexer.index(new Path(outputDir)); System.exit(code); return code; }
/** * This method call when injected into a class will modify the output path, only if output is into * HDFS * * @param job Job whose output path need to be changed */ public static void modifyOutputPath(Job job) { Path path = FileOutputFormat.getOutputPath(job); if (path == null) { throw new IllegalArgumentException("Job Output path is null, expecting not null path value"); } StringBuilder out = new StringBuilder(path.toString()); out.append(SEPARATOR_UNDERSCORE).append(System.currentTimeMillis()); FileOutputFormat.setOutputPath(job, new Path(out.toString())); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, "ClientUserInstallMR"); job.setJarByClass(ClientUserInstallMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOutput = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOutput); tmpOutput.getFileSystem(conf).delete(tmpOutput, true); job.setMapperClass(ClientUserInstallFirstMapper.class); job.setReducerClass(ClientUserInstallFirstReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(30); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job secondJob = new Job(conf, "ClientUserInstallResult"); secondJob.setJarByClass(ClientUserInstallMR.class); conf.set("stat_date", conf.get("stat_date")); FileInputFormat.addInputPath(secondJob, new Path(tmpDir)); Path output = new Path(outputDir); FileOutputFormat.setOutputPath(secondJob, output); output.getFileSystem(conf).delete(output, true); secondJob.setMapperClass(ClientUserInstallSecondMapper.class); secondJob.setReducerClass(ClientUserInstallSecondReduce.class); secondJob.setInputFormatClass(KeyValueTextInputFormat.class); secondJob.setOutputFormatClass(TextOutputFormat.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(Text.class); secondJob.setNumReduceTasks(1); code = secondJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOutput, true); System.exit(code); return code; }
/** Runs this tool. */ public int run(String[] args) throws IOException { DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args); if (options == null) { return -1; } // Temp directory. String tmpDir = "tmp-" + TrecDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000); LOG.info("Tool name: " + TrecDocnoMappingBuilder.class.getCanonicalName()); LOG.info(" - input path: " + options.collection); LOG.info(" - output file: " + options.docnoMapping); Job job = new Job( getConf(), TrecDocnoMappingBuilder.class.getSimpleName() + ":" + options.collection); FileSystem fs = FileSystem.get(job.getConfiguration()); job.setJarByClass(TrecDocnoMappingBuilder.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(options.collection)); FileOutputFormat.setOutputPath(job, new Path(tmpDir)); FileOutputFormat.setCompressOutput(job, false); job.setInputFormatClass(TrecDocumentInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpDir), true); try { job.waitForCompletion(true); } catch (Exception e) { throw new RuntimeException(e); } String input = tmpDir + (tmpDir.endsWith("/") ? "" : "/") + "/part-r-00000"; TrecDocnoMapping.writeMappingData( new Path(input), new Path(options.docnoMapping), FileSystem.get(getConf())); fs.delete(new Path(tmpDir), true); return 0; }
@SuppressWarnings("unchecked") @Override public void setStoreLocation(String location, Job job) throws IOException { log.debug("setStoreLocation({}, {})", location, job); job.getConfiguration().set("mapred.textoutputformat.separator", ""); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get("output.compression.codec"); try { FileOutputFormat.setOutputCompressorClass( job, (Class<? extends CompressionCodec>) Class.forName(codec)); } catch (ClassNotFoundException e) { throw new RuntimeException("Class not found: " + codec); } } else { if (location.endsWith(".bz2") || location.endsWith(".bz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); } else if (location.endsWith(".gz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput(job, false); } } }
public static void main(String[] args) throws Exception { String paths = "/user/cloudera/00"; String path1 = paths; String path2 = ""; for (int i = 1; i <= 3; i++) { System.out.println("Now exectuing the " + i + "-th job!"); Job job = new Job(); path2 = paths + i; job.setJarByClass(PageRank.class); job.setJobName("PageRank"); path2 = paths + i; FileInputFormat.addInputPath(job, new Path(path1)); FileOutputFormat.setOutputPath(job, new Path(path2)); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); path1 = path2; job.waitForCompletion(true); } // System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: RemoveDup <in> [<in>...] <out>"); System.exit(2); } // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在) // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]); Job job = Job.getInstance(conf, "RemoveDup"); job.setJarByClass(RemoveDup.class); job.setMapperClass(RemoveDupMapper.class); job.setCombinerClass(RemoveDupReducer.class); job.setReducerClass(RemoveDupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
@Override public int run(String[] args) throws Exception { if (args.length < 7) { System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "OSM-Gridding"); job.setJarByClass(OSMGrid.class); job.setOutputKeyClass(WritablePoint.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(OSMMapper.class); job.setPartitionerClass(GridPartitioner.class); job.setReducerClass(OSMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set(OSMMapper.MINLAT, args[2]); job.getConfiguration().set(OSMMapper.MINLON, args[3]); job.getConfiguration().set(OSMMapper.MAXLAT, args[4]); job.getConfiguration().set(OSMMapper.MAXLON, args[5]); job.getConfiguration().set(OSMReducer.GRID, args[6]); job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6])); boolean succ = job.waitForCompletion(true); return succ ? 0 : 1; }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.set("job.name", System.currentTimeMillis() + "/"); Properties configProps = loadJobProperties(); CacheUtils.addSerializableToCache(conf, randomDenseMapVector(100000), "inputVector"); Job job = new Job(conf, "matrix multiply"); job.setJarByClass(MatrixMultiplyJob.class); job.setMapperClass(MatrixMultiplyMapper.class); job.setReducerClass(MatrixMultiplyReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(MapVectorWritableComparable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String timestamp = new Date().toString().replace(' ', '_').replace(':', '_'); FileInputFormat.addInputPath( job, new Path(configProps.getProperty("sparse.vector.output.path"))); FileOutputFormat.setOutputPath( job, new Path(configProps.getProperty("dense.vector.output.path") + timestamp)); return job.waitForCompletion(true) ? 1 : -1; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ESIndexCreator"); job.setJarByClass(ESIndexCreator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ESIndexCreator.MyMapper.class); job.setNumReduceTasks(0); // Skip Reduce Task job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // 프로그램 인자 // 0: 입력 파일 경로 // 1: 출력 파일 경로 // 2: elastic search server's host name FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set("host", args[2]); job.waitForCompletion(true); }
public int run(String[] args) throws Exception { if (args.length < 2) { LOG.error("Usage: " + getClass().getName() + " inputFileHDFS outputFileHDFS"); return 1; } String inputFile = args[0]; String outputFile = args[1] + System.nanoTime(); Configuration configuration = getConf(); Job job = new Job(configuration); job.setJarByClass(getClass()); job.setJobName(getClass().getName()); job.setMapperClass(ReadRequestMap.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path(outputFile)); job.waitForCompletion(true); return 0; }
private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public static void main(String[] args) throws Exception { /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure. Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: hubsandspokesload <in> <out> <finalout>"); System.exit(2); } Job job = new Job(conf, "hubsandspokesload"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeLoadMapper.class); job.setReducerClass(HubSpokeLoadReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean b = job.waitForCompletion(true); if (!b) { System.exit(2); } /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke /// Value at each Node in the graph iteratively. dijkstra(otherArgs[1], otherArgs[2]); // dijkstra("output1", "finaloutput"); }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String rowKeyType = args[3]; conf.set("row.key.type", rowKeyType); conf.set("table.name", tableName); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes(columnFamily)); scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance( conf, "Count the column count and indexRecordSize for each row in " + tableName); job.setJarByClass(TruthyIndexFeatureCounter.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, TfcMapper.class, Text.class, Text.class, job, true); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); TableMapReduceUtil.addDependencyJars(job); return job; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Running aggregates for numerical attributes"; job.setJobName(jobName); job.setJarByClass(RunningAggregator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(RunningAggregator.AggrMapper.class); job.setReducerClass(RunningAggregator.AggrReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
@Override protected void configureJob(Job job) throws IOException { Configuration conf = job.getConfiguration(); job.setJarByClass(PartialBuilder.class); FileInputFormat.setInputPaths(job, getDataPath()); FileOutputFormat.setOutputPath(job, getOutputPath(conf)); job.setOutputKeyClass(TreeID.class); job.setOutputValueClass(MapredOutput.class); job.setMapperClass(Step1Mapper.class); job.setNumReduceTasks(0); // no reducers job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // For this implementation to work, mapred.map.tasks needs to be set to the actual // number of mappers Hadoop will use: TextInputFormat inputFormat = new TextInputFormat(); List<?> splits = inputFormat.getSplits(job); if (splits == null || splits.isEmpty()) { log.warn("Unable to compute number of splits?"); } else { int numSplits = splits.size(); log.info("Setting mapred.map.tasks = {}", numSplits); conf.setInt("mapred.map.tasks", numSplits); } }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName); job.setJarByClass(UtilityPredictor.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UtilityPredictor.PredictionMapper.class); job.setReducerClass(UtilityPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }