static void configureIncrementalLoad( Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { // record this table name for creating writer by favored nodes LOG.info("bulkload locality sensitive enabled"); conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString()); } // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + regionLocator.getName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info( "Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
public static void configureIncrementalLoadMap(Job job, HTableDescriptor tableDescriptor) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(HFileOutputFormat2.class); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured."); }