private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
/** * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link * #writePartitionFile}. */ public int run(String[] args) throws Exception { Job job = new Job(getConf()); ArrayList<String> otherArgs = new ArrayList<String>(); Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-inFormat".equals(args[i])) { job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class)); } else if ("-keyClass".equals(args[i])) { job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class)); } else if ("-splitSample".equals(args[i])) { int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new SplitSampler<K, V>(numSamples, maxSplits); } else if ("-splitRandom".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("-splitInterval".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new IntervalSampler<K, V>(pcnt, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (job.getNumReduceTasks() <= 1) { System.err.println("Sampler requires more than one reducer"); return printUsage(); } if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } if (null == sampler) { sampler = new RandomSampler<K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.remove(otherArgs.size() - 1)); TotalOrderPartitioner.setPartitionFile(getConf(), outf); for (String s : otherArgs) { FileInputFormat.addInputPath(job, new Path(s)); } InputSampler.<K, V>writePartitionFile(job, sampler); return 0; }
/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
@Test public void testJobConfiguration() throws Exception { Job job = new Job(util.getConfiguration()); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name); job.setJarByClass(TopKRollupPhaseOneJob.class); // Map config job.setMapperClass(TopKRollupPhaseOneMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Reduce config job.setReducerClass(TopKRollupPhaseOneReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String numReducers = props.getProperty("num.reducers"); if (numReducers != null) { job.setNumReduceTasks(Integer.parseInt(numReducers)); } else { job.setNumReduceTasks(10); } LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks()); // topk_rollup_phase1 phase config Configuration configuration = job.getConfiguration(); String inputPathDir = getAndSetConfiguration(configuration, TOPK_ROLLUP_PHASE1_INPUT_PATH); getAndSetConfiguration(configuration, TOPK_ROLLUP_PHASE1_CONFIG_PATH); getAndSetConfiguration(configuration, TOPK_ROLLUP_PHASE1_OUTPUT_PATH); getAndSetConfiguration(configuration, TOPK_ROLLUP_PHASE1_METRIC_SUMS_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(",")) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } FileOutputFormat.setOutputPath( job, new Path(getAndCheck(TOPK_ROLLUP_PHASE1_OUTPUT_PATH.toString()))); job.waitForCompletion(true); return job; }