public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: test.icde12.HadoopJoin <in> <out>"); System.exit(2); } Job job = new Job(conf, "hadoop join"); job.setJarByClass(HadoopJoin.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(ICDEPartitioner.class); // WritableComparator.define(Text.class,new ICDEComparator()); job.setSortComparatorClass(ICDEComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { String dir1 = "/user/miyuru/wcout"; String dir2 = "/user/miyuru/notinverts"; // We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(TokenizerMapper.class); conf.setReducerClass(IntSumReducer.class); conf.setCombinerClass(IntSumReducer.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); Job job = new Job(conf, "NotInFinder"); job.setJarByClass(WordCount.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(LongWritable.class); // job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: topreviews <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "Top Five Reviews"); job.setJarByClass(TopFive.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapperClass(TopFiveMapper.class); job.setReducerClass(TopFiveReducer.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(TextPair.class); job.setOutputKeyClass(TextPair.class); job.setOutputValueClass(TextPair.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ","); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // Logger log = Logger.getLogger("sds"); Job job = new Job(conf, "Max "); job.setMapOutputKeyClass(CompositeKey.class); job.setPartitionerClass(ActualKeyPartitioner.class); job.setGroupingComparatorClass(ActualKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setJarByClass(map_reduce.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(27); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static Job startJob(String[] args) throws IOException { // args[0] = hbase table name // args[1] = zookeeper Configuration hConf = HBaseConfiguration.create(new Configuration()); hConf.set("hbase.zookeeper.quorum", args[1]); hConf.set("scan.table", args[0]); hConf.set("hbase.zookeeper.property.clientPort", "2181"); Scan scan = new Scan(); // scan.setFilter(rowColBloomFilter()); Job job = new Job(hConf); job.setJobName("BSBM-Q11-RepartitionJoin"); job.setJarByClass(RepartitionJoinQ11.class); // Change caching to speed up the scan scan.setCaching(500); scan.setMaxVersions(200); scan.setCacheBlocks(false); // Mapper settings TableMapReduceUtil.initTableMapperJob( args[0], // input HBase table name scan, // Scan instance to control CF and attribute selection RepartitionMapper.class, // mapper CompositeKeyWritable.class, // mapper output key KeyValueArrayWritable.class, // mapper output value job); // Repartition settings job.setPartitionerClass(CompositePartitioner.class); job.setSortComparatorClass(CompositeSortComparator.class); job.setGroupingComparatorClass(CompositeGroupingComparator.class); // Reducer settings job.setReducerClass(SharedServices.RepartitionJoin_Reducer.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required FileOutputFormat.setOutputPath(job, new Path("output/BSBMQ11")); try { System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return job; }
public Job call() throws IOException, InterruptedException, ClassNotFoundException { job.setMapperClass(GridmixMapper.class); job.setReducerClass(GridmixReducer.class); job.setNumReduceTasks(jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(GridmixRecord.class); job.setSortComparatorClass(GridmixKey.Comparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(GridmixInputFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(GridmixJob.class); job.getConfiguration().setInt("gridmix.job.seq", seq); job.getConfiguration() .set(ORIGNAME, null == jobdesc.getJobID() ? "<unknown>" : jobdesc.getJobID().toString()); job.getConfiguration().setBoolean("mapred.used.genericoptionsparser", true); FileInputFormat.addInputPath(job, new Path("ignored")); FileOutputFormat.setOutputPath(job, outdir); job.submit(); return job; }
@SuppressWarnings("rawtypes") public void afterPropertiesSet() throws Exception { final Configuration cfg = ConfigurationUtils.createFrom(configuration, properties); buildGenericOptions(cfg); if (StringUtils.hasText(user)) { UserGroupInformation ugi = UserGroupInformation.createProxyUser(user, UserGroupInformation.getLoginUser()); ugi.doAs( new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { job = new Job(cfg); return null; } }); } else { job = new Job(cfg); } ClassLoader loader = (beanClassLoader != null ? beanClassLoader : org.springframework.util.ClassUtils.getDefaultClassLoader()); if (jar != null) { JobConf conf = (JobConf) job.getConfiguration(); conf.setJar(jar.getURI().toString()); loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg); conf.setClassLoader(loader); } // set first to enable auto-detection of K/V to skip the key/value types to be specified if (mapper != null) { Class<? extends Mapper> mapperClass = resolveClass(mapper, loader, Mapper.class); job.setMapperClass(mapperClass); configureMapperTypesIfPossible(job, mapperClass); } if (reducer != null) { Class<? extends Reducer> reducerClass = resolveClass(reducer, loader, Reducer.class); job.setReducerClass(reducerClass); configureReducerTypesIfPossible(job, reducerClass); } if (StringUtils.hasText(name)) { job.setJobName(name); } if (combiner != null) { job.setCombinerClass(resolveClass(combiner, loader, Reducer.class)); } if (groupingComparator != null) { job.setGroupingComparatorClass(resolveClass(groupingComparator, loader, RawComparator.class)); } if (inputFormat != null) { job.setInputFormatClass(resolveClass(inputFormat, loader, InputFormat.class)); } if (mapKey != null) { job.setMapOutputKeyClass(resolveClass(mapKey, loader, Object.class)); } if (mapValue != null) { job.setMapOutputValueClass(resolveClass(mapValue, loader, Object.class)); } if (numReduceTasks != null) { job.setNumReduceTasks(numReduceTasks); } if (key != null) { job.setOutputKeyClass(resolveClass(key, loader, Object.class)); } if (value != null) { job.setOutputValueClass(resolveClass(value, loader, Object.class)); } if (outputFormat != null) { job.setOutputFormatClass(resolveClass(outputFormat, loader, OutputFormat.class)); } if (partitioner != null) { job.setPartitionerClass(resolveClass(partitioner, loader, Partitioner.class)); } if (sortComparator != null) { job.setSortComparatorClass(resolveClass(sortComparator, loader, RawComparator.class)); } if (StringUtils.hasText(workingDir)) { job.setWorkingDirectory(new Path(workingDir)); } if (jarClass != null) { job.setJarByClass(jarClass); } if (!CollectionUtils.isEmpty(inputPaths)) { for (String path : inputPaths) { FileInputFormat.addInputPath(job, new Path(path)); } } if (StringUtils.hasText(outputPath)) { FileOutputFormat.setOutputPath(job, new Path(outputPath)); } if (compressOutput != null) { FileOutputFormat.setCompressOutput(job, compressOutput); } if (codecClass != null) { FileOutputFormat.setOutputCompressorClass( job, resolveClass(codecClass, loader, CompressionCodec.class)); } processJob(job); }
public static void main(String[] args) throws Exception { if (!validArgs(args)) { printUsage(); return; } // These are the temp paths that are created on HDFS String dir1 = "/user/miyuru/csrconverter-output"; String dir2 = "/user/miyuru/csrconverter-output-sorted"; // We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); System.out.println("Deleting the dir : " + dir1); if (fs1.exists(new Path(dir1))) { fs1.delete(new Path(dir1), true); } System.out.println("Done deleting the dir : " + dir1); System.out.println("Deleting the dir : " + dir2); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } Path notinPath = new Path("/user/miyuru/notinverts/notinverts"); if (!fs1.exists(notinPath)) { fs1.create(notinPath); } System.out.println("Done deleting the dir : " + dir2); // Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why. VertexCounterClient.setDefaultGraphID(args[3], args[2]); // First job creates the inverted index JobConf conf = new JobConf(CSRConverter.class); conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]); conf.set("org.acacia.partitioner.hbase.table", args[2]); conf.set("org.acacia.partitioner.hbase.contacthost", args[3]); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // conf.setMapperClass(InvertedMapper.class); conf.setReducerClass(InvertedReducer.class); // conf.setInputFormat(TextInputFormat.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // FileInputFormat.setInputPaths(conf, new Path(args[0])); MultipleInputs.addInputPath( conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class); MultipleInputs.addInputPath( conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class, InvertedMapper.class); FileOutputFormat.setOutputPath(conf, new Path(dir1)); // Also for the moment we turn-off the speculative execution conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setNumMapTasks(96); conf.setNumReduceTasks(96); conf.setPartitionerClass(VertexPartitioner.class); conf.set("vertex-count", args[4]); conf.set("zero-flag", args[5]); Job job = new Job(conf, "csr_inverter"); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
private int setJobParameters(Job job, AminoJob aj) throws Exception { final Configuration conf = job.getConfiguration(); final Class<? extends DataLoader> dataLoaderClass = aj.getDataLoaderClass(); AminoInputFormat.setDataLoader(job.getConfiguration(), dataLoaderClass.newInstance()); if (aj instanceof AminoEnrichmentJob) { String output = ""; int returnType = JOB_TYPE_ENRICHMENT; if (aj instanceof AminoReuseEnrichmentJob) { System.out.println("Running REUSE Enrichment Join Job"); AminoReuseEnrichmentJob reuseJob = (AminoReuseEnrichmentJob) aj; AminoInputFormat.setDataLoader( job.getConfiguration(), reuseJob.getFirstPhaseDataLoaderClass().newInstance()); String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT); String front = ""; if (!root.endsWith("/")) front = "/"; root += front; String dir = reuseJob.getOutputSubDirectory(conf); output += root + dir; returnType = JOB_TYPE_REUSE_ENRICHMENT; } else { System.out.println("Running Enrichment Join Job"); } int numReducers = conf.getInt( AMINO_NUM_REDUCERS_ENRICH_PHASE1, conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS)); job.setNumReduceTasks(numReducers); // Our Framework mapper and reducer job.setMapperClass(FrameworkEnrichmentJoinMapper.class); job.setCombinerClass(FrameworkEnrichmentJoinCombiner.class); job.setReducerClass(FrameworkEnrichmentJoinReducer.class); job.setMapOutputKeyClass(EnrichmentJoinKey.class); // Different job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(BucketStripped.class); job.setOutputValueClass(MapWritable.class); // Different job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setInputFormatClass(AminoMultiInputFormat.class); AminoEnrichmentJob aej = (AminoEnrichmentJob) aj; // AminoMultiInputFormat.setJoinDataLoader(conf, aej.getEnrichmentDataLoader().newInstance()); AminoMultiInputFormat.setJoinDataLoaders(conf, aej.getEnrichmentDataLoaders()); AminoMultiInputFormat.setEnrichWorker(conf, aej.getEnrichWorker().newInstance()); job.setOutputFormatClass(SequenceFileOutputFormat.class); // TODO If it already exists, and its age is less than job running frequency, just reuse it // instead of doing the above job... if (output.length() == 0) { output = getEnrichmentOutputPath(aej, conf); } System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output)); SequenceFileOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output))); JobUtilities.deleteDirectory(conf, output); CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf); return returnType; } else { System.out.println("\n==================== Running Amino Job =================\n"); // Our Framework mapper and reducer job.setMapperClass(FrameworkMapper.class); job.setReducerClass(FrameworkReducer.class); job.setMapOutputKeyClass(BucketStripped.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(BucketStripped.class); job.setOutputValueClass(AminoWritable.class); job.setInputFormatClass(AminoInputFormat.class); job.setOutputFormatClass(AminoOutputFormat.class); job.setNumReduceTasks(conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS)); AminoOutputFormat.setAminoConfigPath( job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY)); String output = conf.get("amino.output"); System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output)); AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output))); JobUtilities.deleteDirectory(conf, output); CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf); return JOB_TYPE_NORMAL; } }
@SuppressWarnings("unchecked") public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.InputPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorArgs", conf); String extractorTarget = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorTarget", conf) .toLowerCase(); String outputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.OutputPath", conf); // split examples conf.set("Mavuno.Split.InputPath", inputPath); conf.set("Mavuno.Split.OutputPath", outputPath + "/../split"); conf.set("Mavuno.Split.SplitKey", extractorTarget); new Split(conf).run(); // get splits FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/../split"); int split = 0; for (FileStatus file : files) { if (!file.getPath().getName().endsWith(".examples")) { continue; } conf.set("Mavuno.ExtractGlobalStats.ExamplesPath", file.getPath().toString()); sLogger.info("Tool name: ExtractGlobalStats"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Examples path: " + file.getPath()); sLogger.info(" - Example split: " + split); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor class: " + extractorArgs); sLogger.info(" - Extractor target: " + extractorTarget); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("ExtractGlobalStats"); job.setJarByClass(ExtractGlobalStats.class); MavunoUtils.recursivelyAddInputPaths(job, corpusPath); FileOutputFormat.setOutputPath(job, new Path(outputPath + "/../split/" + split)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass)); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(ContextPatternWritable.class); job.setSortComparatorClass(ContextPatternWritable.Comparator.class); job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class); job.setMapOutputValueClass(ContextPatternStatsWritable.class); job.setOutputKeyClass(ContextPatternWritable.class); job.setOutputValueClass(ContextPatternStatsWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.waitForCompletion(true); split++; } // combine splits conf.setInt("Mavuno.CombineGlobalStats.TotalSplits", split); conf.set("Mavuno.CombineGlobalStats.InputPath", outputPath + "/../split/"); conf.set("Mavuno.CombineGlobalStats.OutputPath", outputPath); new CombineGlobalStats(conf).run(); MavunoUtils.removeDirectory(conf, outputPath + "/../split"); return 0; }