@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(ImagePreProcessor.class); conf.setJobName("preprocessor"); String baseOutputPath = args[1]; FileSystem fs = FileSystem.get(conf); Path interPath = new Path(baseOutputPath + "_inter"); fs.mkdirs(interPath); Path imageFilesPath = new Path(interPath + "\\imageFiles"); fs.mkdirs(imageFilesPath); System.out.println("imageFilesPath--------------->" + imageFilesPath); conf.setStrings("working.directory", imageFilesPath.toString()); copyFilesFromFolder(conf, args[0], imageFilesPath); // conf.setJobName("FileSystemMove"); Path fileNamesPath = new Path(interPath + "\\fileNames"); System.out.println("fileNamesPath------------>" + fileNamesPath); writeImageNamesToFile(conf, imageFilesPath, fileNamesPath); // FileSystem.get(conf).setWorkingDirectory(imageFilesPath); // Path finalIntegerPath = new Path(imageFilesPath.toString()+"\\intImages"); // fs.mkdirs(finalIntegerPath); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); System.out.println("setting mapper"); conf.setMapperClass(ImagePreprocessorMap.class); conf.setReducerClass(ImagePreprocessorReduce.class); System.out.println("mapper set"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(outputClass.class); FileInputFormat.setInputPaths(conf, fileNamesPath); FileOutputFormat.setOutputPath(conf, new Path(interPath.toString() + "\\temp")); JobClient.runJob(conf); return 0; }
public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: " + getClass().getName() + " <input> <output> <nPopulation>"); // ToolRunner.printGenericCommandUsage(System.err); return -1; } // Create a JobConf using the processed <code>conf</code> final JobConf jobConf = new JobConf(getConf(), getClass()); // Specify various job-specific parameters jobConf.setJobName(MapreduceStringFinder.class.getSimpleName()); // setting the input format jobConf.setInputFormat(Individuals.class); // setting the output ke and value class jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BooleanWritable.class); // setting the mapper class jobConf.setMapperClass(CIMapper.class); jobConf.setNumMapTasks(3); // setting number of maptasks // setting the reducer class jobConf.setReducerClass(CIReducer.class); // setup input/output directories final String dataset = args[0]; FileInputFormat.setInputPaths(jobConf, new Path(dataset)); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); final int pop = Integer.parseInt(args[2]); // based on the configuration, make this job threadable if (jobConf.getInt("mapred.tasktracker.map.tasks.maximum", 2) == 1) { jobConf.setMapRunnerClass(MultithreadedMapRunner.class); jobConf.setInt("mapred.map.multithreadedrunner.threads", 100); } jobConf.setInt("mapred.map.multithreadedrunner.threads", 100); // for computation intensive data, do not allow the job to fail if the task tracker does not // respond // with a heatbeat message before the timeout value final int timeout = 9000000; jobConf.setInt("mapred.task.timeout", timeout); // set the parameters to be available before a call to the mapper jobConf.setInt("popsize", pop); jobConf.setStrings("dataset", dataset); // int map = jobConf.getNumMapTasks(); // System.out.println("Number of Maps"+ map); // start the map/reduce job System.out.println("Starting Job"); // get the start time for this job final long startTime = System.currentTimeMillis(); // Submit the job, then poll for progress until the job is complete JobClient.runJob(jobConf); // get the end time for this job final long endTime = System.currentTimeMillis(); // get the duration of this job final double duration = (endTime - startTime) / 1000.0; // System.out.println("Job Finished in " + duration + " seconds"); // getElapsedTime(startTime - endTime); return 0; }
/** @param args */ public static void main(String[] args) { File inputFile = new File(args[0]); File frameFile = new File(args[1]); File tempDir = new File(args[2]); String dbPath = args[3]; try { JobControl jobControl = new JobControl("jsonld-entities"); JobConf defaultConf = new JobConf(); // Map the triples into JSON-LD fragments JobConf initialLoadConf = new JobConf(defaultConf); initialLoadConf.setInt("rank", 0); initialLoadConf.setStrings("frame-file", frameFile.toString()); initialLoadConf.setMapperClass(TripleMapper.class); initialLoadConf.setReducerClass(EntityReducer.class); initialLoadConf.setInputFormat(TextInputFormat.class); initialLoadConf.setOutputFormat(TextOutputFormat.class); initialLoadConf.setMapOutputKeyClass(Text.class); initialLoadConf.setMapOutputValueClass(Text.class); initialLoadConf.setOutputKeyClass(Text.class); initialLoadConf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(initialLoadConf, new Path(inputFile.toString())); Path outputPath = new Path(tempDir.toString() + "/stage0"); FileOutputFormat.setOutputPath(initialLoadConf, outputPath); Path prevOutput = outputPath; Job initialLoad = new Job(initialLoadConf); jobControl.addJob(initialLoad); // Aggregate JSON-LD fragments into nested structure EntityFrame entityFrame = new EntityFrame(); entityFrame.parse(frameFile); Job prevJob = initialLoad; for (int rank = 1; rank <= entityFrame.getMaxRank(); rank++) { JobConf conf = new JobConf(defaultConf); conf.setInt("rank", rank); conf.setStrings("frame-file", frameFile.toString()); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(EntityReducer.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, prevOutput); outputPath = new Path(tempDir.toString() + "/stage" + rank); FileOutputFormat.setOutputPath(conf, outputPath); prevOutput = outputPath; Job buildEntityJob = new Job(conf); jobControl.addJob(buildEntityJob); buildEntityJob.addDependingJob(prevJob); prevJob = buildEntityJob; } // Frame nested data JobConf frameConf = new JobConf(defaultConf); frameConf.setStrings("frame-file", frameFile.toString()); frameConf.setMapperClass(IdentityMapper.class); frameConf.setReducerClass(EntityFrameReducer.class); frameConf.setInputFormat(KeyValueTextInputFormat.class); frameConf.setOutputFormat(MongoOutputFormat.class); frameConf.set("mongo.output.uri", dbPath); frameConf.set( "stream.io.identifier.resolver.class", "com.mongodb.hadoop.mapred.MongoOutputFormat"); frameConf.setMapOutputKeyClass(Text.class); frameConf.setMapOutputValueClass(Text.class); frameConf.setOutputKeyClass(NullWritable.class); frameConf.setOutputValueClass(MongoUpdateWritable.class); FileInputFormat.setInputPaths(frameConf, prevOutput); Job frameEntitiesJob = new Job(frameConf); jobControl.addJob(frameEntitiesJob); frameEntitiesJob.addDependingJob(prevJob); FileSystem fs = FileSystem.get(defaultConf); fs.delete(new Path(tempDir.toString()), true); // Run pipeline jobControl.run(); } catch (IOException e) { // TODO(simister): Auto-generated catch block e.printStackTrace(); } }