public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Step0OutputCollector collector = new Step0OutputCollector(numMaps); Reporter reporter = Reporter.NULL; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); } mapper.map(key, value, collector, reporter); size++; } mapper.close(); // validate the mapper's output assertEquals(p, collector.keys[p]); assertEquals(firstKey.longValue(), collector.values[p].getFirstId()); assertEquals(size, collector.values[p].getSize()); } }
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); // <MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); // </MAP> // <REDUCE> jobConf.setNumReduceTasks(0); // </REDUCE> try { // Run Job JobClient.runJob(jobConf); } finally { // delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
@Override public void reset() { // TODO Auto-generated method stub try { this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
/** Runs this tool. */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int n = Integer.parseInt(args[2]); sLogger.info("Tool name: BuildPageRankRecords"); sLogger.info(" - inputDir: " + inputPath); sLogger.info(" - outputDir: " + outputPath); sLogger.info(" - numNodes: " + n); JobConf conf = new JobConf(BuildPageRankRecords.class); conf.setJobName("PackageLinkGraph"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInt("NodeCnt", n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); TextInputFormat.addInputPath(conf, new Path(inputPath)); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(PageRankNode.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(PageRankNode.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); JobClient.runJob(conf); return 0; }
@Override public void setFile(String file, long offset, long length) { JobConf defaultConf = new JobConf(); this.split = new FileSplit(new Path(file), offset, length, defaultConf); this.jobConf = defaultConf; // this.split = split; this.input_format = new TextInputFormat(); try { this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.key = reader.createKey(); }
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)"); return JobClient.SUCCESS; } String input = args[0]; String output = args[1]; String type = args[2]; String splitChar = args[3]; JobConf config = new JobConf(getConf(), getClass()); config.set("user.split", splitChar); config.setJobName("File Filter -" + System.currentTimeMillis()); config.setNumReduceTasks(10); config.setReducerClass(IdentityReducer.class); config.setMapperClass(FileTestMapper.class); if ("text".equals(type)) { config.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(config, new Path(input)); } else { config.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(config, new Path(input)); } config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {DateStringUtils.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now()); } JobConf config = new JobConf(getConf(), getClass()); config.set("user.args", Utils.asString(args)); config.setJobName(getClass() + "-" + System.currentTimeMillis()); config.setNumReduceTasks(100); config.setMapperClass(getClass()); config.setReducerClass(getClass()); config.setInputFormat(getInputFormat()); config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); // add input paths for (String path : getInputPath(args)) { if (TextInputFormat.class.equals(getInputFormat())) { TextInputFormat.addInputPath(config, new Path(path)); } else if (SequenceFileInputFormat.class.equals(getInputFormat())) { SequenceFileInputFormat.addInputPath(config, new Path(path)); } } config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(getOutputPath(args)); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {TaobaoPath.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20100507 now use default!" + TaobaoPath.now()); } JobConf conf = new JobConf(getConf(), NewItemDailyFrom.class); conf.setJobName("NewItemDailyFrom-" + System.currentTimeMillis()); String date = args[0]; FileSystem fs = FileSystem.get(conf); if (fs.exists(TaobaoPath.getOutput("new_item_daily_from", date))) { System.out.println( "ERROR: You has finish this job at this day : " + date + " [ " + TaobaoPath.getOutput("new_item_daily_from", date) + " ] "); return -1; } conf.set("user.date", date); conf.setNumReduceTasks(1); conf.setMapperClass(MapClass.class); conf.setReducerClass(LongSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(LongWritable.class); TextInputFormat.addInputPath(conf, TaobaoPath.hiveAuctionAuctions(date)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(conf, TaobaoPath.getOutput("new_item_daily_from", date)); JobClient.runJob(conf); return JobClient.SUCCESS; }
public void configure(JobConf jobConf) { super.configure(jobConf); }
public void testProcessOutput() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); // each instance label is its index in the dataset int labelId = Utils.findLabel(descriptor); for (int index = 0; index < numInstances; index++) { source[index][labelId] = index; } String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); // prepare a data converter Dataset dataset = DataLoader.generateDataset(descriptor, sData); DataConverter converter = new DataConverter(dataset); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Reporter reporter = Reporter.NULL; int[] keys = new int[numMaps]; Step0Output[] values = new Step0Output[numMaps]; int[] expectedIds = new int[numMaps]; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); expectedIds[p] = converter.convert(0, value.toString()).label; } size++; } keys[p] = p; values[p] = new Step0Output(firstKey, size); } Step0Output[] partitions = Step0Job.processOutput(keys, values); int[] actualIds = Step0Output.extractFirstIds(partitions); assertTrue( "Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds), Arrays.equals(expectedIds, actualIds)); }