public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ","); Job job = Job.getInstance(conf); job.setJarByClass(MapsideJoinDriver.class); job.setMapperClass(MapsideJoinMapper.class); job.setInputFormatClass(CompositeInputFormat.class); String expr = CompositeInputFormat.compose( "inner", KeyValueTextInputFormat.class, new Path(args[0]), new Path(args[1])); // String expr = CompositeInputFormat.compose("outer",KeyValueTextInputFormat.class , new // Path(args[0]),new Path(args[1])); job.getConfiguration().set("mapreduce.join.expr", expr); job.setNumReduceTasks(0); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public int run( String oldRankVectorInput, String newRankVectorInput, String output, String l1NormOutput) throws Exception { Configuration conf = new Configuration(); conf.set("rightHandSideSum", rightHandSideSum + ""); conf.set("rankVectorSum", rankVectorSum + ""); Job job = new Job(conf, "Rank Vector Normalize"); job.setJarByClass(RankVectorNormalize.class); job.setMapperClass(RankVectorNormalize.RankVectorNormalizeMapper.class); job.setReducerClass(RankVectorNormalize.RankVectorNormalizeReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BlockVectorElement.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BlockVectorElement.class); job.setInputFormatClass(CompositeInputFormat.class); String joinStatement = CompositeInputFormat.compose( "inner", SequenceFileInputFormat.class, oldRankVectorInput, newRankVectorInput); conf.set("mapred.join.expr", joinStatement); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); MultipleOutputs.addNamedOutput( job, l1NormOutput, SequenceFileOutputFormat.class, NullWritable.class, DoubleWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
/** * The main driver for sort program. Invoke this method to submit the map/reduce job. * * @throws Exception When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = new Job(conf); job.setJobName("join"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration() .set( CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }