@Override
  public int run(String[] args) throws Exception {
    Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
    if (job == null) {
      return -1;
    }

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    InputSampler.Sampler<IntWritable, Text> sampler =
        new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10);

    InputSampler.writePartitionFile(job, sampler);

    // Add to DistributedCache
    Configuration conf = job.getConfiguration();
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionUri = new URI(partitionFile);
    job.addCacheFile(partitionUri);

    return job.waitForCompletion(true) ? 0 : 1;
  }
  @Override
  public int run(String[] arg0) throws Exception {
    Job job = Job.getInstance(getConf(), "PopulationJob");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Population.class);

    Path out = new Path("totalorder");
    FileInputFormat.setInputPaths(job, "populations");
    FileOutputFormat.setOutputPath(job, out);
    out.getFileSystem(conf).delete(out, true);

    job.setMapperClass(PopulationMapper.class);
    job.setReducerClass(PopulationReducer.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(5);

    // Configure the TotalOrderPartitioner here...
    job.setPartitionerClass(TotalOrderPartitioner.class);
    InputSampler.Sampler<Text, Text> sampler =
        new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3);
    InputSampler.writePartitionFile(job, sampler);
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionURI);

    return job.waitForCompletion(true) ? 0 : 1;
  }
예제 #3
0
  /**
   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints
   * </code>. Cleans up the partitions file after job exists.
   */
  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
      throws IOException {
    Configuration conf = job.getConfiguration();
    // create the partitions file
    FileSystem fs = FileSystem.get(conf);
    Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    writePartitions(conf, partitionsPath, splitPoints);
    fs.deleteOnExit(partitionsPath);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
  }
예제 #4
0
  /**
   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints
   * </code>. Cleans up the partitions file after job exists.
   */
  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
      throws IOException {
    Configuration conf = job.getConfiguration();
    // create the partitions file
    FileSystem fs = FileSystem.get(conf);
    String hbaseTmpFsDir =
        conf.get(
            HConstants.TEMPORARY_FS_DIRECTORY_KEY, HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
    Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    writePartitions(conf, partitionsPath, splitPoints);
    fs.deleteOnExit(partitionsPath);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
  }