public int run(String[] args) throws Exception { Path inputPath = new Path("weblog_entries.txt"); Path outputPath = new Path("output"); Schema schema = ReflectData.get().getSchema(WeblogRecord.class); Configuration conf = getConf(); Job weblogJob = Job.getInstance(conf); weblogJob.setJobName("Avro Writer"); weblogJob.setJarByClass(getClass()); weblogJob.setNumReduceTasks(0); weblogJob.setMapperClass(WeblogMapper_Ex_5.class); weblogJob.setMapOutputKeyClass(AvroWrapper.class); weblogJob.setMapOutputValueClass(NullWritable.class); weblogJob.setInputFormatClass(TextInputFormat.class); AvroJob.setOutputKeySchema(weblogJob, schema); FileInputFormat.setInputPaths(weblogJob, inputPath); FileOutputFormat.setOutputPath(weblogJob, outputPath); if (weblogJob.waitForCompletion(true)) { return 0; } return 1; }
private boolean runJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(WordCount.class); // Configure input format and files job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDir)); // Configure output format and files job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); // set up mapper, combiner and reducer job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); // set sorting, grouping and partitioning // set key and value types job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true); }
/** * Get job corresponding to jobid. * * @param jobId * @return object of {@link Job} * @throws IOException * @throws InterruptedException */ public Job getJob(JobID jobId) throws IOException, InterruptedException { JobStatus status = client.getJobStatus(jobId); if (status != null) { return Job.getInstance(this, status, new JobConf(status.getJobFile())); } return null; }
public int run(String[] args) throws Exception { // TODO Auto-generated method stub Job job = Job.getInstance( getConf(), "Import vessel locations from files in " + args[0] + " into table cdb_vessel:vessel_location"); // co FileInputFormat.addInputPath(job, new Path(args[0])); job.setJarByClass(ImportVTLocationFromFileWithReducer.class); job.setJobName("Vessel_location_injection"); job.setInputFormatClass(VTVesselLocationFileInputFormat.class); job.setMapOutputKeyClass(Key_IMOAndRecordTime.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setPartitionerClass(Partitioner_IMO.class); job.setGroupingComparatorClass(GroupComparator_IMO.class); job.setReducerClass(ImportReducer.class); job.setNumReduceTasks(Integer.parseInt(args[1])); job.setOutputFormatClass(NullOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
private void doMapReduce() { try { Job job = Job.getInstance(); job.getConfiguration().set(OutputFormat.NAMESPACE, "/"); job.getConfiguration().set(OutputFormat.TABLE, "LoadTest"); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue()); job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0); job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows); job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients); job.setJarByClass(LoadTest.class); job.setJobName("Hypertable MapReduce connector LoadTest"); job.setInputFormatClass(LoadInputFormat.class); job.setOutputFormatClass(OutputFormat.class); job.setMapOutputKeyClass(KeyWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setMapperClass(LoadMapper.class); job.setReducerClass(LoadReducer.class); job.setNumReduceTasks(this.clients); job.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { if (args.length < 7) { System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "OSM-Gridding"); job.setJarByClass(OSMGrid.class); job.setOutputKeyClass(WritablePoint.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(OSMMapper.class); job.setPartitionerClass(GridPartitioner.class); job.setReducerClass(OSMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.getConfiguration().set(OSMMapper.MINLAT, args[2]); job.getConfiguration().set(OSMMapper.MINLON, args[3]); job.getConfiguration().set(OSMMapper.MAXLAT, args[4]); job.getConfiguration().set(OSMMapper.MAXLON, args[5]); job.getConfiguration().set(OSMReducer.GRID, args[6]); job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6])); boolean succ = job.waitForCompletion(true); return succ ? 0 : 1; }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String rowKeyType = args[3]; conf.set("row.key.type", rowKeyType); conf.set("table.name", tableName); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes(columnFamily)); scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance( conf, "Count the column count and indexRecordSize for each row in " + tableName); job.setJarByClass(TruthyIndexFeatureCounter.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, TfcMapper.class, Text.class, Text.class, job, true); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); TableMapReduceUtil.addDependencyJars(job); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: MapRedAWithIP <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "A RRs with IP Addresses"); job.setInputFormatClass(DNSFileInputFormat.class); job.setJarByClass(MapRedARRs.class); job.setMapperClass(MapRecords.class); job.setReducerClass(ReduceRecords.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DNSRecordIO.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); MapRedFileUtils.deleteDir(otherArgs[1]); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { { Configuration conf = new Configuration(); conf.set("mapreduce.job.jar", "wcwin.jar"); // 此处代码,一定放在Job任务前面,否则会报类找不到的异常 Job job = Job.getInstance(conf, "winjob"); getConf(conf); job.setJarByClass(WordCount.class); job.setMapperClass(WMapper.class); job.setReducerClass(WReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String path = "/wc/output"; // FileSystem fs = FileSystem.get(conf); Path p = new Path(path); // if (fs.exists(p)) { // fs.delete(p, true); // System.out.println("输出路径存在,已删除!"); // } FileInputFormat.setInputPaths(job, "/wc/srcdata"); FileOutputFormat.setOutputPath(job, p); printEnv(job); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public int run(String[] args) throws Exception { String input = "hdfs://master:9000/tmp/logs/merge_data/"; String output = "hdfs://master:9000/tmp/logs/merge_data/result"; Configuration conf = getConf(); Job job = Job.getInstance(conf, MergeExample.class.getSimpleName()); // 设置jar job.setJarByClass(MergeExample.class); job.setInputFormatClass(TextInputFormat.class); // 设置Mapper相关的属性 job.setMapperClass(MergeExampleMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, input); // // 设置Reducer相关属性 // job.setReducerClass(MergeExampleReducer.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(output)); // 提交任务 job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Repartition Join (projection by text)"); job.setJarByClass(ReplicatedJoinBasic.class); // Input parameters Path donationsPath = new Path(args[0]); Path projectsPath = new Path(args[1]); Path outputPath = new Path(args[2]); // Mappers configuration MultipleInputs.addInputPath( job, donationsPath, SequenceFileInputFormat.class, DonationsMapper.class); MultipleInputs.addInputPath( job, projectsPath, SequenceFileInputFormat.class, ProjectsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer configuration job.setNumReduceTasks(3); job.setReducerClass(JoinReducer.class); FileOutputFormat.setOutputPath(job, outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void runDailyTrendEstimationJob(String inputPath) throws Exception { Configuration conf = new Configuration(); Job job; try { conf.set("mongo.output.uri", "mongodb://52.33.93.221:27017/mongo_hadoop.page_trends"); job = Job.getInstance(conf, "dataclean"); job.setJarByClass(DataCleanJob.class); job.setMapperClass(DataCleanMapper.class); job.setReducerClass(DataCleanReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PageDataValue.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(MongoUpdateWritable.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.addInputPath(job, new Path(inputPath)); MongoConfig mongoConfig = new MongoConfig(conf); mongoConfig.setOutputFormat(MongoOutputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
public static void runTrendCalcJob(String inputPath, String outputPath) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job; try { job = Job.getInstance(conf, "Trend Calc"); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TrendMapper.class); job.setReducerClass(TrendTopCalcReducer.class); job.setMapOutputKeyClass(TrendKey.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setJarByClass(DataCleanJob.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Scan scan = new Scan(); scan.addFamily(Cw09Constants.CF_FREQUENCIES_BYTES); scan.setBatch(Cw09Constants.CW09_INDEX_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance(conf, "Count the total frequency of each term in the index table"); job.setJarByClass(TermHitsCounter.class); // TableMapReduceUtil.initTableMapperJob(Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, // ThcMapper.class, Text.class, LongWritable.class, job); TableMapReduceUtil.initTableMapperJob( Cw09Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, ThcMapper.class, Text.class, LongWritable.class, job, true, CustomizedSplitTableInputFormat.class); job.setCombinerClass(ThcCombiner.class); TableMapReduceUtil.initTableReducerJob( Cw09Constants.CLUEWEB09_TERM_COUNT_TABLE_NAME, ThcReducer.class, job); job.setNumReduceTasks(40); return job; }
private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, AnalysisMapReduce.class.getName()); job.setJarByClass(AnalysisMapReduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(AnalysicMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); // job.setNumReduceTasks(0); job.setNumReduceTasks(1); job.setReducerClass(AnalysicReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); Path out = new Path(args[1]); FileSystem fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { String instance = args[0]; String zookeepers = args[1]; String user = args[2]; String tokenFile = args[3]; String input = args[4]; String tableName = args[5]; Job job = Job.getInstance(getConf()); job.setJobName(TokenFileWordCount.class.getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, input); job.setMapperClass(MapClass.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); // AccumuloInputFormat not used here, but it uses the same functions. AccumuloOutputFormat.setZooKeeperInstance( job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers)); AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tableName); job.waitForCompletion(true); return 0; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "simple feature writer"); job.setJarByClass(FeatureWriterJob.class); job.setMapperClass(MyMapper.class); job.setInputFormatClass(GeoMesaInputFormat.class); job.setOutputFormatClass(GeoMesaOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScalaSimpleFeature.class); job.setNumReduceTasks(0); Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(job, params, query); Map<String, String> outParams = new HashMap<String, String>(); outParams.put("instanceId", "myinstance"); outParams.put("zookeepers", "zoo1,zoo2,zoo3"); outParams.put("user", "myuser"); outParams.put("password", "mypassword"); outParams.put("tableName", "mycatalog_2"); GeoMesaOutputFormat.configureDataStore(job, outParams); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) */ @SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { //Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: topreviews <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "Top Five Reviews"); job.setJarByClass(TopFive.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapperClass(TopFiveMapper.class); job.setReducerClass(TopFiveReducer.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(TextPair.class); job.setOutputKeyClass(TextPair.class); job.setOutputValueClass(TextPair.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: firstprog <input HIB> <output directory>"); System.exit(0); } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks // job.setInputFormatClass(ImageBundleInputFormat.class); job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(SampleProgram.class); job.setMapperClass(SampleProgramMapper.class); job.setReducerClass(SampleProgramReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatImage.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AccessProcessJob.class); conf.set(nameNode, hdfsURL); conf.setJobName("AccessProcessJob"); Job job = Job.getInstance(conf, "AccessProcessJob"); new Path(outputPath).getFileSystem(conf).delete(new Path(outputPath), true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AccessProcessMap.class); conf.setReducerClass(AccessProcessReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { final int ret = parseArgs(args); if (ret < 0) { return ret; } Job job = Job.getInstance(getConf()); job.setJarByClass(GreeDiFirst.class); job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount)); job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount); job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount); job.setNumReduceTasks(partitionCount); SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWithVectorWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(GreeDiReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
@Before public void setup() throws IOException { job = Job.getInstance(); conf = job.getConfiguration(); attemptId = new TaskAttemptID(); Path inputPath = new Path(TABLE_PATH_STR); inputSplit = new FileSplit(inputPath, 0, 1, null); Descriptor desc = new Descriptor(new File(TABLE_PATH_STR), "keyspace", "columnFamily", 1, false); doReturn(desc).when(ssTableColumnRecordReader).getDescriptor(); doReturn(desc).when(ssTableRowRecordReader).getDescriptor(); doNothing() .when(ssTableColumnRecordReader) .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class)); doNothing() .when(ssTableRowRecordReader) .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class)); doReturn(ssTableReader) .when(ssTableColumnRecordReader) .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class)); doReturn(ssTableReader) .when(ssTableRowRecordReader) .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class)); when(ssTableReader.getDirectScanner(null)).thenReturn(tableScanner); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: RemoveDup <in> [<in>...] <out>"); System.exit(2); } // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在) // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]); Job job = Job.getInstance(conf, "RemoveDup"); job.setJarByClass(RemoveDup.class); job.setMapperClass(RemoveDupMapper.class); job.setCombinerClass(RemoveDupReducer.class); job.setReducerClass(RemoveDupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] arg0) throws Exception { Job job = Job.getInstance(getConf(), "PopulationJob"); Configuration conf = job.getConfiguration(); job.setJarByClass(Population.class); Path out = new Path("totalorder"); FileInputFormat.setInputPaths(job, "populations"); FileOutputFormat.setOutputPath(job, out); out.getFileSystem(conf).delete(out, true); job.setMapperClass(PopulationMapper.class); job.setReducerClass(PopulationReducer.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5); // Configure the TotalOrderPartitioner here... job.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<Text, Text> sampler = new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3); InputSampler.writePartitionFile(job, sampler); String partitionFile = TotalOrderPartitioner.getPartitionFile(conf); URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); job.addCacheFile(partitionURI); return job.waitForCompletion(true) ? 0 : 1; }
private Job[] getJobs(JobStatus[] stats) throws IOException { List<Job> jobs = new ArrayList<Job>(); for (JobStatus stat : stats) { jobs.add(Job.getInstance(this, stat, new JobConf(stat.getJobFile()))); } return jobs.toArray(new Job[0]); }
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... "); String bitvectorpath = args[0], outputPath = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(new Cluster(conf), conf); job.setJobName("int key replace phase1"); job.setJarByClass(OutlinkGrowthAnalysis.class); job.setMapperClass(BVIdentitiyMapper.class); job.setReducerClass(AnaylseOLGrowthReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TabSeperatedTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setInputPaths(job, new Path(bitvectorpath)); job.setNumReduceTasks(1); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020"; Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(WordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); if (args.length > 2) { job.setNumReduceTasks(Integer.parseInt(args[2])); } job.setMapperClass(CountMapper.class); job.setReducerClass(CountReducer.class); job.setJarByClass(WordCount.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt")); FileSystem fs = FileSystem.get(conf); // handle (e.g. delete) existing output path Path outputDestination = new Path(args[0] + args[1]); if (fs.exists(outputDestination)) { fs.delete(outputDestination, true); } // set output path & start job1 FileOutputFormat.setOutputPath(job, outputDestination); int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1; }