private static void setJobName(JobConf jobConf, List<DataSegment> segments) { if (segments.size() == 1) { final DataSegment segment = segments.get(0); jobConf.setJobName( String.format( "druid-convert-%s-%s-%s", segment.getDataSource(), segment.getInterval(), segment.getVersion())); } else { final Set<String> dataSources = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getDataSource(); } })); final Set<String> versions = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getVersion(); } })); jobConf.setJobName( String.format( "druid-convert-%s-%s", Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray()))); } }
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); // <MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); // </MAP> // <REDUCE> jobConf.setNumReduceTasks(0); // </REDUCE> try { // Run Job JobClient.runJob(jobConf); } finally { // delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AccessProcessJob.class); conf.set(nameNode, hdfsURL); conf.setJobName("AccessProcessJob"); Job job = Job.getInstance(conf, "AccessProcessJob"); new Path(outputPath).getFileSystem(conf).delete(new Path(outputPath), true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AccessProcessMap.class); conf.setReducerClass(AccessProcessReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); JobClient.runJob(conf); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); // JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); // set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
public void run() throws Exception { long startTime = System.currentTimeMillis(); JobConf conf = new JobConf(ItemCFJob.class); conf.setJobName("ItemCF" + System.currentTimeMillis()); conf.setNumMapTasks(10); conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); StringBuilder sb = new StringBuilder(); sb.append("--input ").append(input); sb.append(" --output ").append(output); if (flag) { sb.append(" --booleanData true"); } else { sb.append(" --booleanData false"); } sb.append(" --similarityClassname " + Constants.mahout_similarityclassname); sb.append(" --tempDir ").append(tmp); String[] args = sb.toString().split(" "); RecommenderJob job = new RecommenderJob(); job.setConf(conf); job.run(args); long endTime = System.currentTimeMillis(); logger.info( "recommdation job [" + conf.getJobName() + "] run finish. it costs" + (endTime - startTime) / 1000 + "s."); }
public int run(final String[] args) throws Exception { log.info("run starting"); final Configuration conf = getConf(); JobConf job = new JobConf(conf, ExternalJoin.class); job.setJobName("AerospikeExternalJoin"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); // job.setCombinerClass(Reduce.class); // Reduce changes format. job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Session.class); job.setOutputFormat(SessionOutputFormat.class); for (int ii = 0; ii < args.length; ++ii) FileInputFormat.addInputPath(job, new Path(args[ii])); JobClient.runJob(job); log.info("finished"); return 0; }
public static void runJob(String[] args) { JobConf conf = new JobConf(CassandraBulkLoader.class); if (args.length >= 4) { conf.setNumReduceTasks(new Integer(args[3])); } try { // We store the cassandra storage-conf.xml on the HDFS cluster DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf); } catch (URISyntaxException e) { throw new RuntimeException(e); } conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("CassandraBulkLoader_v2"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Max temperature"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setCombinerClass(MaxTemperatureReducer.class); conf.setReducerClass(MaxTemperatureReducer.class); // vv MaxTemperatureDriverV6 conf.setProfileEnabled(true); conf.setProfileParams( "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s"); conf.setProfileTaskRange(true, "0-2"); // ^^ MaxTemperatureDriverV6 JobClient.runJob(conf); return 0; }
public RunningJob run(String inputPath, String outputPath) throws Exception { JobConf conf = new JobConf(BuildIndex.class); conf.setJobName("BuildIndex"); FileInputFormat.addInputPath(conf, new Path(inputPath)); // multiple path FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(LongWritable.class); conf.set("delim", delim); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("keyFieldIndexTwo", keyFieldIndexTwo); conf.setMapperClass(BuildIndexMapper.class); conf.setNumReduceTasks(1); conf.setReducerClass(BuildIndexReducer.class); conf.setInputFormat(TextInputFormat.class); // conf.setInputFormat(CustomInputFormat.class); // FileOutputFormat.setCompressOutput(conf,true); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public static void main(String[] args) throws Exception { String input = "hdfs://centos:9000/access.log.10"; String output = "hdfs://centos:9000/out_kpitime"; JobConf conf = new JobConf(KPITime.class); conf.setJobName("KPITime"); // conf.addResource("classpath:/hadoop/core-site.xml"); // conf.addResource("classpath:/hadoop/hdfs-site.xml"); // conf.addResource("classpath:/hadoop/mapred-site.xml"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(KPITimeMapper.class); conf.setCombinerClass(KPITimeReducer.class); conf.setReducerClass(KPITimeReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: BuildGraph"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(BuildGraph.class); conf.setJobName("BuildGraph " + inputPath + " " + ContrailConfig.K); ContrailConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(BuildGraphMapper.class); conf.setReducerClass(BuildGraphReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public void runMR(String myMultiLocs, String sortKey) throws ParseException, IOException, Exception, org.apache.hadoop.zebra.parser.ParseException { JobConf jobConf = new JobConf(conf); jobConf.setJobName("TestMultipleOutputs4"); jobConf.setJarByClass(TestMultipleOutputs4.class); jobConf.set("table.output.tfile.compression", "gz"); jobConf.set("sortKey", sortKey); // input settings jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(TestMultipleOutputs4.MapClass.class); jobConf.setMapOutputKeyClass(BytesWritable.class); jobConf.setMapOutputValueClass(ZebraTuple.class); FileInputFormat.setInputPaths(jobConf, inputPath); jobConf.setNumMapTasks(1); // output settings jobConf.setOutputFormat(BasicTableOutputFormat.class); BasicTableOutputFormat.setMultipleOutputs( jobConf, myMultiLocs, TestMultipleOutputs4.OutputPartitionerClass.class); // set the logical schema with 2 columns BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int"); // for demo purposes, create 2 physical column groups BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]"); BasicTableOutputFormat.setSortInfo(jobConf, sortKey); System.out.println("in runMR, sortkey: " + sortKey); // set map-only job. jobConf.setNumReduceTasks(1); JobClient.runJob(jobConf); BasicTableOutputFormat.close(jobConf); }
/** * {@inheritDoc} * * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { JobConf configuration = new JobConf(getConf(), WordCountExtended.class); configuration.setJobName(JOB_NAME); configuration.setOutputKeyClass(Text.class); configuration.setOutputValueClass(IntWritable.class); configuration.setMapperClass(Map.class); configuration.setCombinerClass(Reduce.class); configuration.setReducerClass(Reduce.class); configuration.setInputFormat(TextInputFormat.class); configuration.setOutputFormat(TextOutputFormat.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if (JOB_SKIP_ARGUMENT.equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), configuration); configuration.setBoolean(JOB_PARAMETER_SKIP_PATTERNS, true); } else { otherArgs.add(args[i]); } } FileInputFormat.setInputPaths(configuration, new Path(otherArgs.get(0))); FileOutputFormat.setOutputPath(configuration, new Path(otherArgs.get(1))); JobClient.runJob(configuration); return 0; }
public static void main(String[] args) throws Exception { String input = "hdfs://192.168.0.110:9000/input/access.log"; String output = "hdfs://192.168.0.110:9000/user/hdfs/pv"; JobConf conf = new JobConf(KPIPV.class); conf.setJobName("KPIPV"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(KPIPVMapper.class); conf.setCombinerClass(KPIPVReducer.class); conf.setReducerClass(KPIPVReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
/** * Run the job * * @param params The Job parameters containing the gramSize, input output folders, defaultCat, * encoding */ public static void runJob(Parameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesClassifierDriver.class); conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath")); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath"))); Path outPath = new Path(params.get("testDirPath") + "-output"); FileOutputFormat.setOutputPath(conf, outPath); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setMapperClass(BayesClassifierMapper.class); conf.setCombinerClass(BayesClassifierReducer.class); conf.setReducerClass(BayesClassifierReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); HadoopUtil.overwriteOutput(outPath); conf.set("bayes.parameters", params.toString()); client.setConf(conf); JobClient.runJob(conf); Path outputFiles = new Path(outPath, "part*"); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params); log.info("{}", matrix.summarize()); }
/* (non-Javadoc) * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run() */ @Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try { conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); // FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); // conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
/** * This is the main routine for launching a distributed random write job. It runs 10 maps/node and * each node writes 1 gig of data to a DFS file. The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); /** 如果属性不存在 则返回默认的值 * */ int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong( "test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); /** 建议型的 * */ System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println( "The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
/** * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce * job. * * @throws IOException When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCountSeqOutput.class); conf.setJobName("wordcount_seqOF"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) // conf.setOutputValueClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combiner.class); conf.setReducerClass(Reduce.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // // compress Mapper output // conf.setCompressMapOutput(true); // conf.setMapOutputCompressorClass(org.apache.hadoop.io.compress.GzipCodec.class); // compress final output conf.set("mapred.output.compress", conf.get("mapred.output.compress", "true")); conf.set("mapred.output.compression.type", conf.get("mapred.output.compression.type", "BLOCK")); conf.set( "mapred.output.compression.codec", conf.get("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); String stoplistPath = args[4]; sLogger.info("Tool: AFormatter"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(AFormatterWG.class); conf.setJobName("Authority Formatter -- Web Graph"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); // conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(HITSNode.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setCompressMapOutput(true); conf.setSpeculativeExecution(false); // InputSampler.Sampler<IntWritable, Text> sampler = new // InputSampler.RandomSampler<IntWritable, Text>(0.1, 10, 10); // InputSampler.writePartitionFile(conf, sampler); // conf.setPartitionerClass(TotalOrderPartitioner.class); conf.setMapperClass(AFormatMapperIMC.class); conf.setCombinerClass(AFormatReducer.class); conf.setReducerClass(AFormatReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); Path stopList = new Path(stoplistPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); sLogger.info("Starting job"); DistributedCache.addCacheFile(stopList.toUri(), conf); JobClient.runJob(conf); sLogger.info( "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
// 加载Hadoop配置文件 public static JobConf getConfig() { JobConf conf = new JobConf(HDFSDao.class); conf.setJobName("HdfsDAO"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf; }
public JobBuilder(String jobName, Configuration conf) { _jobConf = new JobConf(conf); // legacy crap _jobConf.addResource("nutch-default.xml"); _jobConf.addResource("nutch-site.xml"); // defaults _jobConf.setNumTasksToExecutePerJvm(1000); _jobConf.setJobName(jobName); }
public static int main(String[] args) throws Exception { int i; String outPath; int numMaps = 0, numReds = 0; List<String> other_args = new ArrayList<String>(); for (i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { numMaps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { numReds = Integer.parseInt(args[++i]); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); printUsage(); // exits } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); printUsage(); } Date startTime = new Date(); System.out.println("Job started: " + startTime); Date startIteration; Date endIteration; JobConf conf = new JobConf(Kmeans.class); conf.setJobName("kmeans"); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(ClusterWritable.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); outPath = new String(other_args.get(1)); FileOutputFormat.setOutputPath(conf, new Path(outPath)); startIteration = new Date(); JobClient.runJob(conf); endIteration = new Date(); System.out.println( "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); return 0; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf( "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } // get configuration Configuration conf = getConf(); CommonUtility.printConfiguration(conf); // create a JobConf JobConf jobconf = new JobConf(conf); // set name jobconf.setJobName("CommonTestJob"); // set input/output path Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(jobconf, in); FileOutputFormat.setOutputPath(jobconf, out); // set input/output format jobconf.setInputFormat(SequenceFileInputFormat.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); // set output key/value jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(RRIntervalWritable.class); // set mapper/reducer class jobconf.setMapperClass(CommonTestMapper.class); jobconf.setReducerClass(IdentityReducer.class); // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqMulti"), // SequenceFileInputFormat.class, CommonTestMapper.class); // // MultipleInputs.addInputPath(jobconf, new // Path("hdfs://localhost/work/lab/ecg/rrSeqSingle"), // SequenceFileInputFormat.class, IdentityMapper.class); jobconf.setNumReduceTasks(0); JobClient.runJob(jobconf); // --- end--- CommonUtility.printConfiguration(jobconf); return 0; }
@Override public int run(String[] args) throws Exception { System.out.println("\n\nConvolutionJob\n"); JobConf conf = new JobConf(getConf(), ConvolutionJob.class); conf.setJobName("ConvolutionJob"); this.cacheKernel(conf); this.CreateRats(conf); conf.setMapperClass(ConvolutionMapper.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } conf.setNumReduceTasks(0); conf.setInputFormat(NonSplittableTextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapred.output.compression.type", "BLOCK"); FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); // FileOutputFormat.setCompressOutput(conf, true); JobClient.runJob(conf); return 0; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ExtractorTask.class); conf.setJobName("Wikipedia Extrator"); conf.setMapperClass(MyMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(CsvOutputFormat.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, args[1]); CsvOutputFormat.setOutputPath(conf, new Path(args[2])); JobClient.runJob(conf); }
public void run(String outputPath) throws Exception { JobConf conf = new JobConf(CopyOfLogGenerator.class); conf.setJobName("log-generator"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CopyOfLogGenerator.class); conf.setNumReduceTasks(0); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { if (args.length != 1) { String message = "Incorrect arguments -- requires 1 argument.\n\t " + "1) directory containing N-triples \n\t"; throw new Exception(message); } String triples = args[0]; String outputDir = args[1]; Path outputPath = new Path(outputDir); Configuration fconf = new Configuration(); FileSystem fs = FileSystem.get(fconf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } JobConf jobConf = new JobConf(this.getClass()); jobConf.setJobName("MongoHadoopMR"); Path pOutput = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, pOutput); jobConf.setInputFormat(KeyValueTextInputFormat.class); // jobConf.setOutputFormat(MongoOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(jobConf, new Path(triples)); jobConf.setMapperClass(Map.class); jobConf.setNumReduceTasks(0); // jobConf.setReducerClass(Reduce.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); RunningJob job = JobClient.runJob(jobConf); if (!job.isSuccessful()) System.out.println("Hadoop Job Failed"); return 0; }