public static void runSortJob(String... args) throws Exception { Path input = new Path(args[0]); Path output = new Path(args[1]); JobConf job = new JobConf(); job.setNumReduceTasks(2); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(SampleJob.class); output.getFileSystem(job).delete(output, true); JobClient jc = new JobClient(job); JobClient.setTaskOutputFilter(job, JobClient.TaskStatusFilter.ALL); RunningJob rj = jc.submitJob(job); try { if (!jc.monitorAndPrintJob(job, rj)) { System.out.println("Job Failed: " + rj.getFailureInfo()); throw new IOException("Job failed!"); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); // JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public RunningJob run(String inputPath, String outputPath) throws Exception { JobConf conf = new JobConf(BuildIndex.class); conf.setJobName("BuildIndex"); FileInputFormat.addInputPath(conf, new Path(inputPath)); // multiple path FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(LongWritable.class); conf.set("delim", delim); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("keyFieldIndexTwo", keyFieldIndexTwo); conf.setMapperClass(BuildIndexMapper.class); conf.setNumReduceTasks(1); conf.setReducerClass(BuildIndexReducer.class); conf.setInputFormat(TextInputFormat.class); // conf.setInputFormat(CustomInputFormat.class); // FileOutputFormat.setCompressOutput(conf,true); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
public void runMR(String myMultiLocs, String sortKey) throws ParseException, IOException, Exception, org.apache.hadoop.zebra.parser.ParseException { JobConf jobConf = new JobConf(conf); jobConf.setJobName("TestMultipleOutputs4"); jobConf.setJarByClass(TestMultipleOutputs4.class); jobConf.set("table.output.tfile.compression", "gz"); jobConf.set("sortKey", sortKey); // input settings jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(TestMultipleOutputs4.MapClass.class); jobConf.setMapOutputKeyClass(BytesWritable.class); jobConf.setMapOutputValueClass(ZebraTuple.class); FileInputFormat.setInputPaths(jobConf, inputPath); jobConf.setNumMapTasks(1); // output settings jobConf.setOutputFormat(BasicTableOutputFormat.class); BasicTableOutputFormat.setMultipleOutputs( jobConf, myMultiLocs, TestMultipleOutputs4.OutputPartitionerClass.class); // set the logical schema with 2 columns BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int"); // for demo purposes, create 2 physical column groups BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]"); BasicTableOutputFormat.setSortInfo(jobConf, sortKey); System.out.println("in runMR, sortkey: " + sortKey); // set map-only job. jobConf.setNumReduceTasks(1); JobClient.runJob(jobConf); BasicTableOutputFormat.close(jobConf); }
public int run(final String[] args) throws Exception { log.info("run starting"); final Configuration conf = getConf(); JobConf job = new JobConf(conf, ExternalJoin.class); job.setJobName("AerospikeExternalJoin"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); // job.setCombinerClass(Reduce.class); // Reduce changes format. job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Session.class); job.setOutputFormat(SessionOutputFormat.class); for (int ii = 0; ii < args.length; ++ii) FileInputFormat.addInputPath(job, new Path(args[ii])); JobClient.runJob(job); log.info("finished"); return 0; }
public static void main(String[] args) throws Exception { String input = "hdfs://192.168.0.110:9000/input/access.log"; String output = "hdfs://192.168.0.110:9000/user/hdfs/pv"; JobConf conf = new JobConf(KPIPV.class); conf.setJobName("KPIPV"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(KPIPVMapper.class); conf.setCombinerClass(KPIPVReducer.class); conf.setReducerClass(KPIPVReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: BuildGraph"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(BuildGraph.class); conf.setJobName("BuildGraph " + inputPath + " " + ContrailConfig.K); ContrailConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(BuildGraphMapper.class); conf.setReducerClass(BuildGraphReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
/** * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce * job. * * @throws IOException When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCountSeqOutput.class); conf.setJobName("wordcount_seqOF"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) // conf.setOutputValueClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combiner.class); conf.setReducerClass(Reduce.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // // compress Mapper output // conf.setCompressMapOutput(true); // conf.setMapOutputCompressorClass(org.apache.hadoop.io.compress.GzipCodec.class); // compress final output conf.set("mapred.output.compress", conf.get("mapred.output.compress", "true")); conf.set("mapred.output.compression.type", conf.get("mapred.output.compression.type", "BLOCK")); conf.set( "mapred.output.compression.codec", conf.get("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
/* (non-Javadoc) * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run() */ @Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try { conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); // FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); // conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
public JobBuilder keyValue( Class<? extends WritableComparable> key, Class<? extends Writable> value) throws IOException { _jobConf.setMapOutputKeyClass(key); _jobConf.setMapOutputValueClass(value); _jobConf.setOutputKeyClass(key); _jobConf.setOutputValueClass(value); return this; }
public static int main(String[] args) throws Exception { int i; String outPath; int numMaps = 0, numReds = 0; List<String> other_args = new ArrayList<String>(); for (i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { numMaps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { numReds = Integer.parseInt(args[++i]); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); printUsage(); // exits } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); printUsage(); } Date startTime = new Date(); System.out.println("Job started: " + startTime); Date startIteration; Date endIteration; JobConf conf = new JobConf(Kmeans.class); conf.setJobName("kmeans"); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(ClusterWritable.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); outPath = new String(other_args.get(1)); FileOutputFormat.setOutputPath(conf, new Path(outPath)); startIteration = new Date(); JobClient.runJob(conf); endIteration = new Date(); System.out.println( "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); return 0; }
protected void configJob(JobConf conf) { conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setPartitionerClass(PKPartitioner.class); conf.setOutputValueGroupingComparator(PVComparator.class); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ExtractorTask.class); conf.setJobName("Wikipedia Extrator"); conf.setMapperClass(MyMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(CsvOutputFormat.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, args[1]); CsvOutputFormat.setOutputPath(conf, new Path(args[2])); JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { if (args.length != 1) { String message = "Incorrect arguments -- requires 1 argument.\n\t " + "1) directory containing N-triples \n\t"; throw new Exception(message); } String triples = args[0]; String outputDir = args[1]; Path outputPath = new Path(outputDir); Configuration fconf = new Configuration(); FileSystem fs = FileSystem.get(fconf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } JobConf jobConf = new JobConf(this.getClass()); jobConf.setJobName("MongoHadoopMR"); Path pOutput = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, pOutput); jobConf.setInputFormat(KeyValueTextInputFormat.class); // jobConf.setOutputFormat(MongoOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(jobConf, new Path(triples)); jobConf.setMapperClass(Map.class); jobConf.setNumReduceTasks(0); // jobConf.setReducerClass(Reduce.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); RunningJob job = JobClient.runJob(jobConf); if (!job.isSuccessful()) System.out.println("Hadoop Job Failed"); return 0; }
@Override public int run(String[] args) throws Exception { final int ret = parseArgs(args); if (ret < 0) { return ret; } JobConf config = new JobConf(getConf(), TfIdfNovelty.class); config.setJobName("Influence-TfIdfNovelty"); config.set(Fields.BASIS.get(), basisPath); if (datesPath != null) { config.set(Fields.DOC_DATES.get(), datesPath); } config.setBoolean(Fields.IGNORE.get(), ignoreDocs); if (bands > 0) { config.setInt(Fields.BANDS.get(), bands); } if (rows > 0) { config.setInt(Fields.ROWS.get(), rows); } SetupHelper.getInstance() .setSequenceInput(config, inputPath) .setSequenceOutput(config, outputPath); config.setMapOutputKeyClass(HashBandWritable.class); config.setMapOutputValueClass(DocumentWithVectorWritable.class); config.setMapperClass(TfIdfNoveltyLshMapper.class); if (outputBuckets) { config.setOutputKeyClass(HashBandWritable.class); config.setOutputValueClass(IntArrayWritable.class); config.setReducerClass(TfIdfNoveltyIdentityReducer.class); } else { config.setOutputKeyClass(Text.class); config.setOutputValueClass(VectorWritable.class); config.setReducerClass(TfIdfNoveltyReducer.class); } // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); JobClient.runJob(config); return 0; }
/** Runs this tool. */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int n = Integer.parseInt(args[2]); sLogger.info("Tool name: BuildPageRankRecords"); sLogger.info(" - inputDir: " + inputPath); sLogger.info(" - outputDir: " + outputPath); sLogger.info(" - numNodes: " + n); JobConf conf = new JobConf(BuildPageRankRecords.class); conf.setJobName("PackageLinkGraph"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInt("NodeCnt", n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); TextInputFormat.addInputPath(conf, new Path(inputPath)); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(PageRankNode.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(PageRankNode.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); JobClient.runJob(conf); return 0; }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setMapperClass(SortByDepartmentAndAgeMapper.class); conf.setMapOutputKeyClass(IntPair.class); conf.setPartitionerClass(FirstPartitioner.class); conf.setOutputValueGroupingComparator(FirstGroupingComparator.class); conf.setReducerClass(SortByDepartmentAndAgeReducer.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
public static void run(Map<String, String> path) throws IOException { JobConf conf = Recommend.config(); conf.setJarByClass(Step1.class); String input = "hdfs://192.168.201.11:9000/user/hdfs/recommend/data1"; // 提交的处理训练集的目录 String input1 = path.get("Step1Input"); String output = path.get("Step1Output"); HdfsDAO hdfs = new HdfsDAO(Recommend.HDFS, conf); hdfs.rmr(output); hdfs.rmr(input1); hdfs.mkdirs(input1); // hdfs.copyFile(path.get("data"), input); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Step1_ToUserPreMapper.class); conf.setCombinerClass(Step1_ToItemVectorReducer.class); conf.setReducerClass(Step1_ToItemVectorReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // int maxCurrentReduceTasks = conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 1); // int ReduceTasks = (int) (2 * maxCurrentReduceTasks * 0.95); // conf.setNumReduceTasks(ReduceTasks); /* * reduce的数目最好等于0.95或1.75乘以工作节点数,再乘以mapred.tasktracker.reduce.tasks.maximum(一般是每个节点的CPU数) * 因子为0.95时会让所有的reduce任务立即装载,并当map任务结束时复制它们的输出结果。当因子为1.75时,一些reduce任务会立即 * 被装载,而其他一些则会等待。更早的节点会较早地完成第一轮reduce 任务并开始第二轮。最慢的节点第二轮不需要处理任何的reduce * 任务,这样可以带来更好的负载平衡。 */ // conf.setNumReduceTasks(3); //这里是在虚拟机下测试,就直接设成节点数 FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { job.waitForCompletion(); } }
@Override public int run(String[] args) throws IOException { JobConf conf = JobBuilder.parseInputAndOutput(this, getConf(), args); if (conf == null) { return -1; } conf.setMapperClass(StationMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setReducerClass(MultipleOutputsReducer.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputFormat(NullOutputFormat.class); // suppress empty part file MultipleOutputs.addMultiNamedOutput( conf, "station", TextOutputFormat.class, NullWritable.class, Text.class); JobClient.runJob(conf); return 0; }
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)"); return JobClient.SUCCESS; } String input = args[0]; String output = args[1]; String type = args[2]; String splitChar = args[3]; JobConf config = new JobConf(getConf(), getClass()); config.set("user.split", splitChar); config.setJobName("File Filter -" + System.currentTimeMillis()); config.setNumReduceTasks(10); config.setReducerClass(IdentityReducer.class); config.setMapperClass(FileTestMapper.class); if ("text".equals(type)) { config.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(config, new Path(input)); } else { config.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(config, new Path(input)); } config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {DateStringUtils.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now()); } JobConf config = new JobConf(getConf(), getClass()); config.set("user.args", Utils.asString(args)); config.setJobName(getClass() + "-" + System.currentTimeMillis()); config.setNumReduceTasks(100); config.setMapperClass(getClass()); config.setReducerClass(getClass()); config.setInputFormat(getInputFormat()); config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); // add input paths for (String path : getInputPath(args)) { if (TextInputFormat.class.equals(getInputFormat())) { TextInputFormat.addInputPath(config, new Path(path)); } else if (SequenceFileInputFormat.class.equals(getInputFormat())) { SequenceFileInputFormat.addInputPath(config, new Path(path)); } } config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(getOutputPath(args)); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
@SuppressWarnings("deprecation") public int run(String[] args) throws Exception { JobConf job = new JobConf(super.getConf(), this.getClass()); job.setJarByClass(this.getClass()); job.setJobName("Generate Neighbourhood for BFS"); job.setJobPriority(JobPriority.VERY_HIGH); job.setMapperClass(CreateNeighbourhoodForBFSMap.class); job.setReducerClass(CreateNeighbourhoodForBFSReduce.class); // job.setNumMapTasks(100); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, args[0]); FileSystem.get(job).delete(new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); System.out.println("***********DONE********"); return 0; }
@Override public int run(String[] args) { if (args.length != 2) { System.err.println("need input path argument and output path argument."); return 1; } JobConf conf = new JobConf(getConf(), AccessLogJob.class); conf.setJobName("csp-access-log-job"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(10); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); try { FileSystem fstm = FileSystem.get(conf); Path outDir = new Path(args[1]); fstm.delete(outDir, true); List<Path> inpaths = new LinkedList<Path>(); Path inputPath = new Path(args[0]); LocalUtil.getAllFiles(conf, inputPath, inpaths); // 填充输入文件 FileInputFormat.setInputPaths(conf, inpaths.toArray(new Path[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } catch (Exception e) { System.err.print("job run error:\n" + LocalUtil.getThrowableTrace(e)); } return 0; }
@SuppressWarnings("deprecation") public int run(String[] args) throws Exception { JobConf job = new JobConf(super.getConf(), this.getClass()); job.setJarByClass(this.getClass()); job.setJobName("Create Files of Each Network Type"); job.setJobPriority(JobPriority.VERY_HIGH); job.setMapperClass(CreateNetworkLinkFilesMap.class); job.setReducerClass(CreateNetworkLinkFilesReduce.class); job.set("output", args[1]); job.setNumMapTasks(50); job.setNumReduceTasks(30); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, args[0]); FileSystem.get(job).delete(new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path("/tmp/DeleteThisDirectory1")); JobClient.runJob(job); System.out.println("***********DONE********"); return 0; }
public void testTotalOrderMemCmp() throws Exception { TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>(); JobConf job = new JobConf(); Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalordermemcmp", job, splitStrings); job.setMapOutputKeyClass(Text.class); try { partitioner.configure(job); NullWritable nw = NullWritable.get(); for (Check<Text> chk : testStrings) { assertEquals( chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1)); } } finally { p.getFileSystem(job).delete(p); } }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub JobConf jobConf = new JobConf(getConf(), DailyDedupMr.class); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(UniqueKeyReduce.class); jobConf.setJobName("DailyDedup"); jobConf.setPartitionerClass(DailyPartitioner.class); jobConf.setOutputFormat(org.apache.hadoop.mapred.DailyOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(LogRecord.class); jobConf.setMapOutputValueClass(LogRecord.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setNumReduceTasks(3); FileInputFormat.setInputPaths(jobConf, args[0]); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
/** Driver for the actual MapReduce process */ private void runJob() throws IOException { JobConf conf = new JobConf(getConf(), IpAddrJoin.class); FileInputFormat.addInputPath(conf, new Path(IP_ADDRESS_INPUTS)); FileInputFormat.addInputPath(conf, new Path(LOCATION_DATA_INPUTS)); FileOutputFormat.setOutputPath(conf, new Path(OUTPUT_PATH)); conf.setMapperClass(IpAddrJoinMapper.class); conf.setReducerClass(IpAddrJoinReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(JoinRecordKey.class); conf.setMapOutputValueClass(JoinRecordValue.class); conf.setNumReduceTasks(5); JobClient.runJob(conf); }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {TaobaoPath.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20100507 now use default!" + TaobaoPath.now()); } JobConf conf = new JobConf(getConf(), NewItemDailyFrom.class); conf.setJobName("NewItemDailyFrom-" + System.currentTimeMillis()); String date = args[0]; FileSystem fs = FileSystem.get(conf); if (fs.exists(TaobaoPath.getOutput("new_item_daily_from", date))) { System.out.println( "ERROR: You has finish this job at this day : " + date + " [ " + TaobaoPath.getOutput("new_item_daily_from", date) + " ] "); return -1; } conf.set("user.date", date); conf.setNumReduceTasks(1); conf.setMapperClass(MapClass.class); conf.setReducerClass(LongSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(LongWritable.class); TextInputFormat.addInputPath(conf, TaobaoPath.hiveAuctionAuctions(date)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(conf, TaobaoPath.getOutput("new_item_daily_from", date)); JobClient.runJob(conf); return JobClient.SUCCESS; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(MapReduceFour.class); conf.setJobName("step-Four"); conf.setMapOutputKeyClass(VarLongWritable.class); conf.setMapOutputValueClass(VectorWritable.class); conf.setOutputKeyClass(RecommendResult.class); conf.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths( conf, new Path("/Users/bai/Documents/mapreduce/output-third/part-00000")); // DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", // "jdbc:mysql://localhost:3306/hotelrecommend","root","root"); // DBOutputFormat.setOutput(conf,"recommend","uid","hid","recommendValue"); conf.setMapperClass(PartialMultiplyMapper.class); conf.setCombinerClass(AggregateCombiner.class); conf.setReducerClass(AggregateAndRecommendReducer.class); conf.setInputFormat(SequenceFileInputFormat.class); // conf.setOutputFormat(DBOutputFormat.class); JobClient.runJob(conf); }
public int run(String[] args) throws Exception { String driverClassName = "org.hsqldb.jdbcDriver"; String url = "jdbc:hsqldb:hsql://localhost/URLAccess"; if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); JobConf job = new JobConf(getConf(), DBCountPageView.class); job.setJobName("Count Pageviews of URLs"); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBConfiguration.configureDB(job, driverClassName, url); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); try { JobClient.runJob(job); boolean correct = verify(); if (!correct) throw new RuntimeException("Evaluation was not correct!"); } finally { shutdown(); } return 0; }