public static void main(String[] args) throws Exception { new JobConf().setSpeculativeExecution(false); Configuration conf = new Configuration(); conf.set("es.nodes", ES_NODES); conf.set("es.resource", ES_RESOURCE); conf.set("es.mapping.id", HBaseTableMapper.ID_FIELD.toString()); conf.set("es.batch.size.bytes", "10mb"); conf.set("es.batch.size.entries", "10000"); conf.set("es.batch.write.refresh", "false"); Job job = new Job(conf); job.setJarByClass(BulkIndex.class); job.setMapperClass(HBaseTableMapper.class); job.setNumReduceTasks(0); job.setSpeculativeExecution(false); job.setOutputFormatClass(BulkProcessorOutputFormat.class); job.setMapOutputValueClass(Text.class); Scan scan = new Scan(); scan.setCaching(1000); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob( BulkLoad.HBASE_TABLE_NAME, scan, HBaseTableMapper.class, NullWritable.class, MapWritable.class, job); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String inputPath = args[0]; String outputPath = args[1]; HTable hTable = new HTable(conf, "bigd24-hbase-sample"); Job job = new Job(conf, "HBase_Bulk_loader"); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); job.setJarByClass(HBaseBulkLoad.class); job.setMapperClass(HBaseBulkLoad.BulkLoadMap.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); HFileOutputFormat.configureIncrementalLoad(job, hTable); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** @param args */ @Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(getClass().getName(), args); Job job = JobUtil.getJob(getConf()); job.setJobName(getClass().getSimpleName()); job.setJarByClass(getClass()); opts.setAccumuloConfigs(job); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setMapperClass(NGramMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setSpeculativeExecution(false); if (!opts.getConnector().tableOperations().exists(opts.tableName)) { log.info("Creating table " + opts.tableName); opts.getConnector().tableOperations().create(opts.tableName); SortedSet<Text> splits = new TreeSet<Text>(); String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s"); String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s"); String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s"); for (String[] array : new String[][] {numbers, lower, upper}) { for (String s : array) { splits.add(new Text(s)); } } opts.getConnector().tableOperations().addSplits(opts.tableName, splits); } TextInputFormat.addInputPath(job, new Path(opts.inputDirectory)); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
@Override public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set("fs.defaultFS", "file:///"); final Job job = Job.getInstance(conf, JOB_NAME); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, new Path(TEST_DATA_LOCATION)); FileOutputFormat.setOutputPath(job, cleanPathForReuse(conf, OUTPUT_PATH)); job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class); job.setReducerClass(Reducer.class); // (Identity Reducer) job.setInputFormatClass(GeonamesDataFileInputFormat.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(1); job.setSpeculativeExecution(false); boolean result = job.waitForCompletion(true); mapInputRecords = job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_INPUT_RECORDS).getValue(); mapOutputRecords = job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_OUTPUT_RECORDS).getValue(); return result ? 0 : 1; }
@VisibleForTesting public Job createJob( int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); conf.setInt(IREDUCE_TASKS_COUNT, numIReducer); // Configure intermediate reduces conf.setInt( org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount); LOG.info("Running MRR with " + iReduceStagesCount + " IR stages"); for (int i = 1; i <= iReduceStagesCount; ++i) { // Set reducer class for intermediate reduce conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( i, "mapreduce.job.reduce.class"), ISleepReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( i, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( i, "mapreduce.map.output.value.class"), IntWritable.class, Object.class); conf.setInt( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"), numIReducer); } Job job = Job.getInstance(conf, "sleep"); job.setNumReduceTasks(numReducer); job.setJarByClass(MRRSleepJob.class); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(MRRSleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); // setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter( fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.waitForCompletion(true); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4) .setScale(20) .multiply(BigDecimal.valueOf(numInside.get())) .divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }