private void configureMapperTypesIfPossible(Job j, Class<? extends Mapper> mapper) { // Find mapper Class<?> targetClass = mapper; Type targetType = mapper; do { targetType = targetClass.getGenericSuperclass(); targetClass = targetClass.getSuperclass(); } while (targetClass != null && targetClass != Object.class && !Mapper.class.equals(targetClass)); if (targetType instanceof ParameterizedType) { Type[] params = ((ParameterizedType) targetType).getActualTypeArguments(); if (params.length == 4) { // set each param (if possible); if (params[2] instanceof Class) { Class<?> clz = (Class<?>) params[2]; if (!clz.isInterface()) j.setMapOutputKeyClass(clz); } // set each param (if possible); if (params[3] instanceof Class) { Class<?> clz = (Class<?>) params[3]; if (!clz.isInterface()) { j.setMapOutputValueClass(clz); } } } } }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); // conf.set("stat_date", dateString); Job job = new Job(conf, "DayhslogUserDateNewMac"); job.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp")); FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true); job.setMapperClass(DayhslogUserDateNewMacMapper.class); job.setReducerClass(DayhslogUserDateNewMacReducer.class); // job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(10); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job resultJob = new Job(conf, "DayhslogUserDate"); resultJob.setJarByClass(DayhslogUserDate.class); FileInputFormat.addInputPath(resultJob, new Path(args[0])); FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp")); FileOutputFormat.setOutputPath(resultJob, new Path(args[2])); FileSystem.get(conf).delete(new Path(args[2]), true); resultJob.setMapperClass(DayhslogUserDateMapper.class); resultJob.setReducerClass(DayhslogUserDateReducer.class); resultJob.setNumReduceTasks(10); resultJob.setMapOutputKeyClass(Text.class); resultJob.setMapOutputValueClass(Text.class); resultJob.setOutputKeyClass(Text.class); resultJob.setOutputValueClass(Text.class); code = resultJob.waitForCompletion(true) ? 0 : 1; } Path tmpPath = new Path(args[2] + "tmp"); FileSystem.get(conf).delete(tmpPath, true); System.exit(code); return code; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ","); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // Logger log = Logger.getLogger("sds"); Job job = new Job(conf, "Max "); job.setMapOutputKeyClass(CompositeKey.class); job.setPartitionerClass(ActualKeyPartitioner.class); job.setGroupingComparatorClass(ActualKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setJarByClass(map_reduce.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(27); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: topreviews <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "Top Five Reviews"); job.setJarByClass(TopFive.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapperClass(TopFiveMapper.class); job.setReducerClass(TopFiveReducer.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(TextPair.class); job.setOutputKeyClass(TextPair.class); job.setOutputValueClass(TextPair.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) */ @SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { //Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (args.length < 1) { System.out.println("USAGE: RFDSReasoner [pool path] [options]"); return; } Job job = new Job(conf, "reasoner"); job.setJarByClass(TCMReasoner.class); System.out.println(args[0]); job.setMapperClass(TCMMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Triple.class); job.setReducerClass(TCMReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Triple.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); Counter derivedTriples = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS"); System.out.println(derivedTriples.getValue()); return; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { final int ret = parseArgs(args); if (ret < 0) { return ret; } Job job = Job.getInstance(getConf()); job.setJarByClass(GreeDiFirst.class); job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount)); job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount); job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount); job.setNumReduceTasks(partitionCount); SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWithVectorWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(GreeDiReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
public static void main(String[] args) throws Exception { /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure. Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: hubsandspokesload <in> <out> <finalout>"); System.exit(2); } Job job = new Job(conf, "hubsandspokesload"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeLoadMapper.class); job.setReducerClass(HubSpokeLoadReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean b = job.waitForCompletion(true); if (!b) { System.exit(2); } /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke /// Value at each Node in the graph iteratively. dijkstra(otherArgs[1], otherArgs[2]); // dijkstra("output1", "finaloutput"); }
public Job getJob(Configuration conf) throws IOException { Job job = new Job(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); job.setMapperClass(Mapper.class); job.setReducerClass(PivotingReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(RuleWritable.SourcePartitioner.class); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected")); int maxSplitSize = conf.getInt("thrax.max-split-size", 0); if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); int numReducers = conf.getInt("thrax.reducers", 4); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted")); FileOutputFormat.setCompressOutput(job, true); return job; }
/** * Run a local map reduce job to read records from HCatalog table and verify if the count is as * expected * * @param readCount * @param filter * @return * @throws Exception */ List<HCatRecord> runMRRead(int readCount, String filter) throws Exception { MapRead.readCount = 0; readRecords.clear(); Configuration conf = new Configuration(); conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true"); Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName, filter); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); if (fs.exists(path)) { fs.delete(path, true); } TextOutputFormat.setOutputPath(job, path); job.waitForCompletion(true); Assert.assertEquals(readCount, MapRead.readCount); return readRecords; }
public int run(String[] args) throws Exception { // TODO Auto-generated method stub if (args.length < 1) { System.out.println("missing input "); System.exit(1); } Configuration conf = new Configuration(); Job job = new Job(new JobConf(conf)); job.setJarByClass(InstallApp.class); job.setMapperClass(InstallMapper.class); job.setReducerClass(InstalleReduce.class); job.setJobName("installtaotaosou"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileSystem fs = FileSystem.get(URI.create(args[0]), conf); FileStatus fileList[] = fs.listStatus(new Path(args[0])); int length = fileList.length; for (int i = 0; i < length; i++) { FileInputFormat.addInputPath(job, fileList[i].getPath()); } FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(1); job.waitForCompletion(true); return 0; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "simple feature writer"); job.setJarByClass(FeatureWriterJob.class); job.setMapperClass(MyMapper.class); job.setInputFormatClass(GeoMesaInputFormat.class); job.setOutputFormatClass(GeoMesaOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScalaSimpleFeature.class); job.setNumReduceTasks(0); Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(job, params, query); Map<String, String> outParams = new HashMap<String, String>(); outParams.put("instanceId", "myinstance"); outParams.put("zookeepers", "zoo1,zoo2,zoo3"); outParams.put("user", "myuser"); outParams.put("password", "mypassword"); outParams.put("tableName", "mycatalog_2"); GeoMesaOutputFormat.configureDataStore(job, outParams); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, AnalysisMapReduce.class.getName()); job.setJarByClass(AnalysisMapReduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(AnalysicMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); // job.setNumReduceTasks(0); job.setNumReduceTasks(1); job.setReducerClass(AnalysicReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); Path out = new Path(args[1]); FileSystem fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(WhiteHouseVisitorDriver.class); // input file format FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // map/combine/reduce class definition job.setMapperClass(WhiteHouseVisitorMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // Key, Value set type definition job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws Exception { { Configuration conf = new Configuration(); conf.set("mapreduce.job.jar", "wcwin.jar"); // 此处代码,一定放在Job任务前面,否则会报类找不到的异常 Job job = Job.getInstance(conf, "winjob"); getConf(conf); job.setJarByClass(WordCount.class); job.setMapperClass(WMapper.class); job.setReducerClass(WReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String path = "/wc/output"; // FileSystem fs = FileSystem.get(conf); Path p = new Path(path); // if (fs.exists(p)) { // fs.delete(p, true); // System.out.println("输出路径存在,已删除!"); // } FileInputFormat.setInputPaths(job, "/wc/srcdata"); FileOutputFormat.setOutputPath(job, p); printEnv(job); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
private void configureMapTasks(String arg, Job job) throws IOException { job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(keyClass); job.setMapOutputValueClass(valueClass); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, arg); }
public static void dijkstra(String input, String output) throws Exception { String temp = output; /// Run HITS Algorithm JOB:2 For 32 Times /// Setting the Value of k-> 32 for (int i = 0; i < 32; i++) { Configuration conf = new Configuration(); Job job = new Job(conf, "hubsandspokes"); job.setJarByClass(HubsAndSpokes.class); job.setMapperClass(HubSpokeMapper.class); job.setReducerClass(HubSpokeReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NodeWritable.class); job.setOutputKeyClass(NodeWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // Toggle the value of Input and Output variable // For Next iteration input = output; output = temp + Integer.toString(i); // Wait for completing the JOB boolean b = job.waitForCompletion(true); if (!b) System.exit(2); // System.exit(job.waitForCompletion(true) ? 0 : 1); } }
/** RUN */ @Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: wordcount <input_dir> <output_dir> <reducers>"); return -1; } Job job = new Job(getConf(), "PigMix L13"); job.setJarByClass(L13.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(ReadInput.class); job.setReducerClass(Join.class); Properties props = System.getProperties(); Configuration conf = job.getConfiguration(); for (Map.Entry<Object, Object> entry : props.entrySet()) { conf.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_page_views")); FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_power_users_samples")); FileOutputFormat.setOutputPath(job, new Path(args[1] + "/L13out")); job.setNumReduceTasks(Integer.parseInt(args[2])); return job.waitForCompletion(true) ? 0 : -1; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName); job.setJarByClass(UtilityPredictor.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UtilityPredictor.PredictionMapper.class); job.setReducerClass(UtilityPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Implicit rating estimator MR"; job.setJobName(jobName); job.setJarByClass(ImplicitRatingEstimator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ImplicitRatingEstimator.RatingEstimatorMapper.class); job.setReducerClass(ImplicitRatingEstimator.RatingEstimatorReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
public int run(String[] args) throws Exception { // TODO Auto-generated method stub Job job = Job.getInstance( getConf(), "Import vessel locations from files in " + args[0] + " into table cdb_vessel:vessel_location"); // co FileInputFormat.addInputPath(job, new Path(args[0])); job.setJarByClass(ImportVTLocationFromFileWithReducer.class); job.setJobName("Vessel_location_injection"); job.setInputFormatClass(VTVesselLocationFileInputFormat.class); job.setMapOutputKeyClass(Key_IMOAndRecordTime.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setPartitionerClass(Partitioner_IMO.class); job.setGroupingComparatorClass(GroupComparator_IMO.class); job.setReducerClass(ImportReducer.class); job.setNumReduceTasks(Integer.parseInt(args[1])); job.setOutputFormatClass(NullOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
/** * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link * #writePartitionFile}. */ public int run(String[] args) throws Exception { Job job = new Job(getConf()); ArrayList<String> otherArgs = new ArrayList<String>(); Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-inFormat".equals(args[i])) { job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class)); } else if ("-keyClass".equals(args[i])) { job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class)); } else if ("-splitSample".equals(args[i])) { int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new SplitSampler<K, V>(numSamples, maxSplits); } else if ("-splitRandom".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("-splitInterval".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new IntervalSampler<K, V>(pcnt, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (job.getNumReduceTasks() <= 1) { System.err.println("Sampler requires more than one reducer"); return printUsage(); } if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } if (null == sampler) { sampler = new RandomSampler<K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.remove(otherArgs.size() - 1)); TotalOrderPartitioner.setPartitionFile(getConf(), outf); for (String s : otherArgs) { FileInputFormat.addInputPath(job, new Path(s)); } InputSampler.<K, V>writePartitionFile(job, sampler); return 0; }
private void doMapReduce() { try { Job job = Job.getInstance(); job.getConfiguration().set(OutputFormat.NAMESPACE, "/"); job.getConfiguration().set(OutputFormat.TABLE, "LoadTest"); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue()); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0); job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows); job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients); job.setJarByClass(LoadTest.class); job.setJobName("Hypertable MapReduce connector LoadTest"); job.setInputFormatClass(LoadInputFormat.class); job.setOutputFormatClass(OutputFormat.class); job.setMapOutputKeyClass(KeyWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setMapperClass(LoadMapper.class); job.setReducerClass(LoadReducer.class); job.setNumReduceTasks(this.clients); job.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { Job job = new Job(conf); job.setJarByClass(StatisticsGenerator.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(StatisticsMap.class); job.setReducerClass(StatisticsReduce.class); job.setMapOutputKeyClass(SortedMapWritableComparable.class); job.setMapOutputValueClass(BytesWritable.class); job.setNumReduceTasks(16); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); long startTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); boolean success = job.waitForCompletion(true); long endTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); double duration = (endTime - startTime) / Math.pow(10, 9); logger.info( "=== Job Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)")); return success ? 0 : 1; }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // TODO Auto-generated method stub JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(5); FileSystem fs = FileSystem.get(conf); Path dir = new Path(args[0]); FileStatus[] stats = fs.listStatus(dir); numFiles = stats.length; Job job = new Job(conf); job.setJarByClass(FileCombiner.class); job.setJobName("File Combiner"); job.setMapperClass(FileCombinerMapper.class); job.setReducerClass(FileCombinerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Running aggregates for numerical attributes"; job.setJobName(jobName); job.setJarByClass(RunningAggregator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(RunningAggregator.AggrMapper.class); job.setReducerClass(RunningAggregator.AggrReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }