public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); // <MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); // </MAP> // <REDUCE> jobConf.setNumReduceTasks(0); // </REDUCE> try { // Run Job JobClient.runJob(jobConf); } finally { // delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
/** Runs this tool. */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int n = Integer.parseInt(args[2]); sLogger.info("Tool name: BuildPageRankRecords"); sLogger.info(" - inputDir: " + inputPath); sLogger.info(" - outputDir: " + outputPath); sLogger.info(" - numNodes: " + n); JobConf conf = new JobConf(BuildPageRankRecords.class); conf.setJobName("PackageLinkGraph"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInt("NodeCnt", n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); TextInputFormat.addInputPath(conf, new Path(inputPath)); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(PageRankNode.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(PageRankNode.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); JobClient.runJob(conf); return 0; }
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)"); return JobClient.SUCCESS; } String input = args[0]; String output = args[1]; String type = args[2]; String splitChar = args[3]; JobConf config = new JobConf(getConf(), getClass()); config.set("user.split", splitChar); config.setJobName("File Filter -" + System.currentTimeMillis()); config.setNumReduceTasks(10); config.setReducerClass(IdentityReducer.class); config.setMapperClass(FileTestMapper.class); if ("text".equals(type)) { config.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(config, new Path(input)); } else { config.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(config, new Path(input)); } config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {DateStringUtils.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now()); } JobConf config = new JobConf(getConf(), getClass()); config.set("user.args", Utils.asString(args)); config.setJobName(getClass() + "-" + System.currentTimeMillis()); config.setNumReduceTasks(100); config.setMapperClass(getClass()); config.setReducerClass(getClass()); config.setInputFormat(getInputFormat()); config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); // add input paths for (String path : getInputPath(args)) { if (TextInputFormat.class.equals(getInputFormat())) { TextInputFormat.addInputPath(config, new Path(path)); } else if (SequenceFileInputFormat.class.equals(getInputFormat())) { SequenceFileInputFormat.addInputPath(config, new Path(path)); } } config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(getOutputPath(args)); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {TaobaoPath.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20100507 now use default!" + TaobaoPath.now()); } JobConf conf = new JobConf(getConf(), NewItemDailyFrom.class); conf.setJobName("NewItemDailyFrom-" + System.currentTimeMillis()); String date = args[0]; FileSystem fs = FileSystem.get(conf); if (fs.exists(TaobaoPath.getOutput("new_item_daily_from", date))) { System.out.println( "ERROR: You has finish this job at this day : " + date + " [ " + TaobaoPath.getOutput("new_item_daily_from", date) + " ] "); return -1; } conf.set("user.date", date); conf.setNumReduceTasks(1); conf.setMapperClass(MapClass.class); conf.setReducerClass(LongSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(LongWritable.class); TextInputFormat.addInputPath(conf, TaobaoPath.hiveAuctionAuctions(date)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(conf, TaobaoPath.getOutput("new_item_daily_from", date)); JobClient.runJob(conf); return JobClient.SUCCESS; }