public static JobControl createValueAggregatorJobs( String args[], Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException { JobControl theControl = new JobControl("ValueAggregatorJobs"); ArrayList<Job> dependingJobs = new ArrayList<Job>(); JobConf aJobConf = createValueAggregatorJob(args); if (descriptors != null) setAggregatorDescriptors(aJobConf, descriptors); Job aJob = new Job(aJobConf, dependingJobs); theControl.addJob(aJob); return theControl; }
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1); } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L10.class); lp.setJobName("L10 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(MyType.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setReducerClass(Group.class); lp.setPartitionerClass(MyPartitioner.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out")); // Hardcode the parallel to 40 since MyPartitioner assumes it lp.setNumReduceTasks(40); Job group = new Job(lp); JobControl jc = new JobControl("L10 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
public static void main(String[] args) throws IOException { JobConf lp = new JobConf(L4.class); lp.setJobName("Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Combiner.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); String dataDir = props.getProperty("PIGMIX_DIR", "/user/pig/tests/data/pigmix"); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(dataDir, "page_views")); FileOutputFormat.setOutputPath( lp, new Path("/user/" + System.getProperty("user.name") + "/L4out")); lp.setNumReduceTasks(40); Job group = new Job(lp); JobControl jc = new JobControl("L4 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
/** @param args */ public static void main(String[] args) { File inputFile = new File(args[0]); File frameFile = new File(args[1]); File tempDir = new File(args[2]); String dbPath = args[3]; try { JobControl jobControl = new JobControl("jsonld-entities"); JobConf defaultConf = new JobConf(); // Map the triples into JSON-LD fragments JobConf initialLoadConf = new JobConf(defaultConf); initialLoadConf.setInt("rank", 0); initialLoadConf.setStrings("frame-file", frameFile.toString()); initialLoadConf.setMapperClass(TripleMapper.class); initialLoadConf.setReducerClass(EntityReducer.class); initialLoadConf.setInputFormat(TextInputFormat.class); initialLoadConf.setOutputFormat(TextOutputFormat.class); initialLoadConf.setMapOutputKeyClass(Text.class); initialLoadConf.setMapOutputValueClass(Text.class); initialLoadConf.setOutputKeyClass(Text.class); initialLoadConf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(initialLoadConf, new Path(inputFile.toString())); Path outputPath = new Path(tempDir.toString() + "/stage0"); FileOutputFormat.setOutputPath(initialLoadConf, outputPath); Path prevOutput = outputPath; Job initialLoad = new Job(initialLoadConf); jobControl.addJob(initialLoad); // Aggregate JSON-LD fragments into nested structure EntityFrame entityFrame = new EntityFrame(); entityFrame.parse(frameFile); Job prevJob = initialLoad; for (int rank = 1; rank <= entityFrame.getMaxRank(); rank++) { JobConf conf = new JobConf(defaultConf); conf.setInt("rank", rank); conf.setStrings("frame-file", frameFile.toString()); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(EntityReducer.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, prevOutput); outputPath = new Path(tempDir.toString() + "/stage" + rank); FileOutputFormat.setOutputPath(conf, outputPath); prevOutput = outputPath; Job buildEntityJob = new Job(conf); jobControl.addJob(buildEntityJob); buildEntityJob.addDependingJob(prevJob); prevJob = buildEntityJob; } // Frame nested data JobConf frameConf = new JobConf(defaultConf); frameConf.setStrings("frame-file", frameFile.toString()); frameConf.setMapperClass(IdentityMapper.class); frameConf.setReducerClass(EntityFrameReducer.class); frameConf.setInputFormat(KeyValueTextInputFormat.class); frameConf.setOutputFormat(MongoOutputFormat.class); frameConf.set("mongo.output.uri", dbPath); frameConf.set( "stream.io.identifier.resolver.class", "com.mongodb.hadoop.mapred.MongoOutputFormat"); frameConf.setMapOutputKeyClass(Text.class); frameConf.setMapOutputValueClass(Text.class); frameConf.setOutputKeyClass(NullWritable.class); frameConf.setOutputValueClass(MongoUpdateWritable.class); FileInputFormat.setInputPaths(frameConf, prevOutput); Job frameEntitiesJob = new Job(frameConf); jobControl.addJob(frameEntitiesJob); frameEntitiesJob.addDependingJob(prevJob); FileSystem fs = FileSystem.get(defaultConf); fs.delete(new Path(tempDir.toString()), true); // Run pipeline jobControl.run(); } catch (IOException e) { // TODO(simister): Auto-generated catch block e.printStackTrace(); } }