@Override public int run(String[] args) throws Exception { JobConf conf = JobBuilder.parseInputAndOutput(this, getConf(), args); if (conf == null) { return -1; } conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); conf.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<IntWritable, Text> sampler = new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10); Path input = FileInputFormat.getInputPaths(conf)[0]; input = input.makeQualified(input.getFileSystem(conf)); Path partitionFile = new Path(input, "_partitions"); TotalOrderPartitioner.setPartitionFile(conf, partitionFile); InputSampler.writePartitionFile(conf, sampler); // Add to DistributedCache URI partitionUri = new URI(partitionFile.toString() + "#_partitions"); DistributedCache.addCacheFile(partitionUri, conf); DistributedCache.createSymlink(conf); JobClient.runJob(conf); return 0; }
public static void main(String[] args) throws Exception { String dir1 = "/user/miyuru/wcout"; String dir2 = "/user/miyuru/notinverts"; // We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(TokenizerMapper.class); conf.setReducerClass(IntSumReducer.class); conf.setCombinerClass(IntSumReducer.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); Job job = new Job(conf, "NotInFinder"); job.setJarByClass(WordCount.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(LongWritable.class); // job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); // <MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); // </MAP> // <REDUCE> jobConf.setNumReduceTasks(0); // </REDUCE> try { // Run Job JobClient.runJob(jobConf); } finally { // delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
public static void main(String[] args) throws Exception { String input = "hdfs://centos:9000/access.log.10"; String output = "hdfs://centos:9000/out_kpitime"; JobConf conf = new JobConf(KPITime.class); conf.setJobName("KPITime"); // conf.addResource("classpath:/hadoop/core-site.xml"); // conf.addResource("classpath:/hadoop/hdfs-site.xml"); // conf.addResource("classpath:/hadoop/mapred-site.xml"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(KPITimeMapper.class); conf.setCombinerClass(KPITimeReducer.class); conf.setReducerClass(KPITimeReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
public RunningJob run(String inputPath, String outputPath) throws Exception { JobConf conf = new JobConf(BuildIndex.class); conf.setJobName("BuildIndex"); FileInputFormat.addInputPath(conf, new Path(inputPath)); // multiple path FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(LongWritable.class); conf.set("delim", delim); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("keyFieldIndexTwo", keyFieldIndexTwo); conf.setMapperClass(BuildIndexMapper.class); conf.setNumReduceTasks(1); conf.setReducerClass(BuildIndexReducer.class); conf.setInputFormat(TextInputFormat.class); // conf.setInputFormat(CustomInputFormat.class); // FileOutputFormat.setCompressOutput(conf,true); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); // JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
/** * Run the job * * @param params The Job parameters containing the gramSize, input output folders, defaultCat, * encoding */ public static void runJob(Parameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesClassifierDriver.class); conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath")); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath"))); Path outPath = new Path(params.get("testDirPath") + "-output"); FileOutputFormat.setOutputPath(conf, outPath); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setMapperClass(BayesClassifierMapper.class); conf.setCombinerClass(BayesClassifierReducer.class); conf.setReducerClass(BayesClassifierReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); HadoopUtil.overwriteOutput(outPath); conf.set("bayes.parameters", params.toString()); client.setConf(conf); JobClient.runJob(conf); Path outputFiles = new Path(outPath, "part*"); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params); log.info("{}", matrix.summarize()); }
/** * Configure the job * * @param conf Job to configure * @param rules classification rules to evaluate * @param target label value to evaluate the rules for * @param inpath input path (the dataset) * @param outpath output <code>Path</code> * @param split DatasetSplit used to separate training and testing input */ private static void configureJob( JobConf conf, List<? extends Rule> rules, int target, Path inpath, Path outpath, DatasetSplit split) { split.storeJobParameters(conf); FileInputFormat.setInputPaths(conf, inpath); FileOutputFormat.setOutputPath(conf, outpath); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(CDFitness.class); conf.setMapperClass(CDMapper.class); conf.setCombinerClass(CDReducer.class); conf.setReducerClass(CDReducer.class); conf.setInputFormat(DatasetTextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // store the parameters conf.set(CDMapper.CLASSDISCOVERY_RULES, StringUtils.toString(rules)); conf.set(CDMapper.CLASSDISCOVERY_DATASET, StringUtils.toString(DataSet.getDataSet())); conf.setInt(CDMapper.CLASSDISCOVERY_TARGET_LABEL, target); }
public static void runSortJob(String... args) throws Exception { Path input = new Path(args[0]); Path output = new Path(args[1]); JobConf job = new JobConf(); job.setNumReduceTasks(2); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(SampleJob.class); output.getFileSystem(job).delete(output, true); JobClient jc = new JobClient(job); JobClient.setTaskOutputFilter(job, JobClient.TaskStatusFilter.ALL); RunningJob rj = jc.submitJob(job); try { if (!jc.monitorAndPrintJob(job, rj)) { System.out.println("Job Failed: " + rj.getFailureInfo()); throw new IOException("Job failed!"); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } }
public static void runJob(String[] args) { JobConf conf = new JobConf(CassandraBulkLoader.class); if (args.length >= 4) { conf.setNumReduceTasks(new Integer(args[3])); } try { // We store the cassandra storage-conf.xml on the HDFS cluster DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf); } catch (URISyntaxException e) { throw new RuntimeException(e); } conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("CassandraBulkLoader_v2"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } }
/** * {@inheritDoc} * * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { JobConf configuration = new JobConf(getConf(), WordCountExtended.class); configuration.setJobName(JOB_NAME); configuration.setOutputKeyClass(Text.class); configuration.setOutputValueClass(IntWritable.class); configuration.setMapperClass(Map.class); configuration.setCombinerClass(Reduce.class); configuration.setReducerClass(Reduce.class); configuration.setInputFormat(TextInputFormat.class); configuration.setOutputFormat(TextOutputFormat.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if (JOB_SKIP_ARGUMENT.equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), configuration); configuration.setBoolean(JOB_PARAMETER_SKIP_PATTERNS, true); } else { otherArgs.add(args[i]); } } FileInputFormat.setInputPaths(configuration, new Path(otherArgs.get(0))); FileOutputFormat.setOutputPath(configuration, new Path(otherArgs.get(1))); JobClient.runJob(configuration); return 0; }
public static void run(Map<String, String> path) throws IOException { JobConf conf = Recommend.config(); String input = path.get("Step2Input"); String output = path.get("Step2Output"); HdfsDAO hdfs = new HdfsDAO(Recommend.HDFS, conf); hdfs.rmr(output); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Step2_UserVectorToCooccurrenceMapper.class); // conf.setCombinerClass(Step2_UserVectorToConoccurrenceReducer.class); // conf.setReducerClass(Step2_UserVectorToConoccurrenceReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { job.waitForCompletion(); } }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AccessProcessJob.class); conf.set(nameNode, hdfsURL); conf.setJobName("AccessProcessJob"); Job job = Job.getInstance(conf, "AccessProcessJob"); new Path(outputPath).getFileSystem(conf).delete(new Path(outputPath), true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AccessProcessMap.class); conf.setReducerClass(AccessProcessReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); JobClient.runJob(conf); }
public void runMR(String myMultiLocs, String sortKey) throws ParseException, IOException, Exception, org.apache.hadoop.zebra.parser.ParseException { JobConf jobConf = new JobConf(conf); jobConf.setJobName("TestMultipleOutputs4"); jobConf.setJarByClass(TestMultipleOutputs4.class); jobConf.set("table.output.tfile.compression", "gz"); jobConf.set("sortKey", sortKey); // input settings jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(TestMultipleOutputs4.MapClass.class); jobConf.setMapOutputKeyClass(BytesWritable.class); jobConf.setMapOutputValueClass(ZebraTuple.class); FileInputFormat.setInputPaths(jobConf, inputPath); jobConf.setNumMapTasks(1); // output settings jobConf.setOutputFormat(BasicTableOutputFormat.class); BasicTableOutputFormat.setMultipleOutputs( jobConf, myMultiLocs, TestMultipleOutputs4.OutputPartitionerClass.class); // set the logical schema with 2 columns BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int"); // for demo purposes, create 2 physical column groups BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]"); BasicTableOutputFormat.setSortInfo(jobConf, sortKey); System.out.println("in runMR, sortkey: " + sortKey); // set map-only job. jobConf.setNumReduceTasks(1); JobClient.runJob(jobConf); BasicTableOutputFormat.close(jobConf); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: BuildGraph"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(BuildGraph.class); conf.setJobName("BuildGraph " + inputPath + " " + ContrailConfig.K); ContrailConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(BuildGraphMapper.class); conf.setReducerClass(BuildGraphReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public static void main(String[] args) throws Exception { String input = "hdfs://192.168.0.110:9000/input/access.log"; String output = "hdfs://192.168.0.110:9000/user/hdfs/pv"; JobConf conf = new JobConf(KPIPV.class); conf.setJobName("KPIPV"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(KPIPVMapper.class); conf.setCombinerClass(KPIPVReducer.class); conf.setReducerClass(KPIPVReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); // set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
/** * This is the main routine for launching a distributed random write job. It runs 10 maps/node and * each node writes 1 gig of data to a DFS file. The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); /** 建议型的 * */ System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println( "The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
/* (non-Javadoc) * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run() */ @Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try { conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); // FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); // conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)"); return JobClient.SUCCESS; } String input = args[0]; String output = args[1]; String type = args[2]; String splitChar = args[3]; JobConf config = new JobConf(getConf(), getClass()); config.set("user.split", splitChar); config.setJobName("File Filter -" + System.currentTimeMillis()); config.setNumReduceTasks(10); config.setReducerClass(IdentityReducer.class); config.setMapperClass(FileTestMapper.class); if ("text".equals(type)) { config.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(config, new Path(input)); } else { config.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(config, new Path(input)); } config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } // get configuration Configuration conf = getConf(); CommonUtility.printConfiguration(conf); // create a JobConf JobConf jobconf = new JobConf(conf); // set name jobconf.setJobName("CommonTestJob"); // set input/output path Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(jobconf, in); FileOutputFormat.setOutputPath(jobconf, out); // set input/output format jobconf.setInputFormat(SequenceFileInputFormat.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); // set output key/value jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(RRIntervalWritable.class); // set mapper/reducer class jobconf.setMapperClass(CommonTestMapper.class); jobconf.setReducerClass(IdentityReducer.class); // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqMulti"), // SequenceFileInputFormat.class, CommonTestMapper.class); // // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqSingle"), // SequenceFileInputFormat.class, IdentityMapper.class); jobconf.setNumReduceTasks(0); JobClient.runJob(jobconf); // --- end--- CommonUtility.printConfiguration(jobconf); return 0; }
public static void main(String[] args) throws IOException { /*JobConf conf = new JobConf(); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(IpCounterMapper.class); conf.setCombinerClass(IpCounterReducer.class); conf.setReducerClass(IpCounterReducer.class); String inputDir = args[0]; String outputDir = args[1]; FileInputFormat.setInputPaths(conf, inputDir); FileOutputFormat.setOutputPath(conf, new Path(outputDir)); boolean flag = JobClient.runJob(conf).isSuccessful(); System.out.println(args.length);*/ if (args.length < 2) { System.out.println("args not right!"); return; } JobConf conf = new JobConf(IpCount1.class); // set output key class conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); // set mapper & reducer class conf.setMapperClass(IpCounterMapper.class); conf.setCombinerClass(IpCounterReducer.class); conf.setReducerClass(IpCounterReducer.class); // set format conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); String inputDir = args[0]; String outputDir = args[1]; // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/"); FileInputFormat.setInputPaths(conf, inputDir); FileOutputFormat.setOutputPath(conf, new Path(outputDir)); boolean flag = JobClient.runJob(conf).isSuccessful(); }
@Override public int run(String[] args) throws Exception { System.out.println("\n\nConvolutionJob\n"); JobConf conf = new JobConf(getConf(), ConvolutionJob.class); conf.setJobName("ConvolutionJob"); this.cacheKernel(conf); this.CreateRats(conf); conf.setMapperClass(ConvolutionMapper.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } conf.setNumReduceTasks(0); conf.setInputFormat(NonSplittableTextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapred.output.compression.type", "BLOCK"); FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); // FileOutputFormat.setCompressOutput(conf, true); JobClient.runJob(conf); return 0; }
@SuppressWarnings("rawtypes") @Override public void sourceConfInit( FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate); } jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class); TupleReadSupport.setRequestedFields(jobConf, getSourceFields()); }
/** * @param process * @param tap * @param conf */ @Override public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { MongoConfigUtil.setReadSplitsFromShards(conf, true); MongoConfigUtil.setInputURI(conf, this.mongoUri); FileInputFormat.setInputPaths(conf, this.getIdentifier()); conf.setInputFormat(MongoInputFormat.class); // TODO: MongoConfigUtil.setFields(conf, fieldsBson); // if (!this.query.isEmpty()) MongoConfigUtil.setQuery(conf, this.query); // TODO: MongoConfigUtil.setFields(conf, fields); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ExtractorTask.class); conf.setJobName("Wikipedia Extrator"); conf.setMapperClass(MyMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(CsvOutputFormat.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, args[1]); CsvOutputFormat.setOutputPath(conf, new Path(args[2])); JobClient.runJob(conf); }
private void runRankOrdering(String inputPath, String outputPath) throws IOException { JobConf conf = new JobConf(WikiPageRanking.class); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setMapperClass(RankingMapper.class); JobClient.runJob(conf); }
public int run(String[] args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("Domain-MR2"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); // conf.setReducerClass(IdentityReducer.class); conf.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); conf.setOutputFormat(MultiFileOutput.class); FileSystem fs = FileSystem.get(conf); fs.delete(new Path(args[1]), true); // delete output dir FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); int reducers = 272; int mappers = 272; conf.setNumMapTasks(reducers); conf.setNumReduceTasks(mappers); // set parameters conf.set("k", "" + k); conf.set("r", "" + k); conf.set("parts", "" + parts); // number of partitions per dimension System.out.println( "running DOMAIN with k=" + k + " r=" + r + " parts=" + parts + " " + "useCellBasedAlgo=" + useCellBasedAlgo + " reducers=" + reducers + " mappers=" + mappers); JobClient.runJob(conf); return 0; }
@Override public int run(String[] args) throws Exception { if (args.length != 1) { String message = "Incorrect arguments -- requires 1 argument.\n\t " + "1) directory containing N-triples \n\t"; throw new Exception(message); } String triples = args[0]; String outputDir = args[1]; Path outputPath = new Path(outputDir); Configuration fconf = new Configuration(); FileSystem fs = FileSystem.get(fconf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } JobConf jobConf = new JobConf(this.getClass()); jobConf.setJobName("MongoHadoopMR"); Path pOutput = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, pOutput); jobConf.setInputFormat(KeyValueTextInputFormat.class); // jobConf.setOutputFormat(MongoOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(jobConf, new Path(triples)); jobConf.setMapperClass(Map.class); jobConf.setNumReduceTasks(0); // jobConf.setReducerClass(Reduce.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); RunningJob job = JobClient.runJob(jobConf); if (!job.isSuccessful()) System.out.println("Hadoop Job Failed"); return 0; }