예제 #1
0
파일: BulkIndex.java 프로젝트: ept/hbase-es
  public static void main(String[] args) throws Exception {
    new JobConf().setSpeculativeExecution(false);
    Configuration conf = new Configuration();
    conf.set("es.nodes", ES_NODES);
    conf.set("es.resource", ES_RESOURCE);
    conf.set("es.mapping.id", HBaseTableMapper.ID_FIELD.toString());
    conf.set("es.batch.size.bytes", "10mb");
    conf.set("es.batch.size.entries", "10000");
    conf.set("es.batch.write.refresh", "false");

    Job job = new Job(conf);
    job.setJarByClass(BulkIndex.class);
    job.setMapperClass(HBaseTableMapper.class);
    job.setNumReduceTasks(0);
    job.setSpeculativeExecution(false);
    job.setOutputFormatClass(BulkProcessorOutputFormat.class);
    job.setMapOutputValueClass(Text.class);

    Scan scan = new Scan();
    scan.setCaching(1000);
    scan.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob(
        BulkLoad.HBASE_TABLE_NAME,
        scan,
        HBaseTableMapper.class,
        NullWritable.class,
        MapWritable.class,
        job);

    job.waitForCompletion(true);
  }
 public static void main(String[] args) throws Exception {
   Configuration conf = HBaseConfiguration.create();
   String inputPath = args[0];
   String outputPath = args[1];
   HTable hTable = new HTable(conf, "bigd24-hbase-sample");
   Job job = new Job(conf, "HBase_Bulk_loader");
   job.setMapOutputKeyClass(ImmutableBytesWritable.class);
   job.setMapOutputValueClass(Put.class);
   job.setSpeculativeExecution(false);
   job.setReduceSpeculativeExecution(false);
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(HFileOutputFormat.class);
   job.setJarByClass(HBaseBulkLoad.class);
   job.setMapperClass(HBaseBulkLoad.BulkLoadMap.class);
   FileInputFormat.setInputPaths(job, inputPath);
   FileOutputFormat.setOutputPath(job, new Path(outputPath));
   HFileOutputFormat.configureIncrementalLoad(job, hTable);
   System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
예제 #3
0
  /** @param args */
  @Override
  public int run(String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs(getClass().getName(), args);

    Job job = JobUtil.getJob(getConf());
    job.setJobName(getClass().getSimpleName());
    job.setJarByClass(getClass());

    opts.setAccumuloConfigs(job);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    job.setMapperClass(NGramMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);
    job.setSpeculativeExecution(false);

    if (!opts.getConnector().tableOperations().exists(opts.tableName)) {
      log.info("Creating table " + opts.tableName);
      opts.getConnector().tableOperations().create(opts.tableName);
      SortedSet<Text> splits = new TreeSet<Text>();
      String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s");
      String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
      String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s");
      for (String[] array : new String[][] {numbers, lower, upper}) {
        for (String s : array) {
          splits.add(new Text(s));
        }
      }
      opts.getConnector().tableOperations().addSplits(opts.tableName, splits);
    }

    TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }
    @Override
    public int run(String[] args) throws Exception {

      final Configuration conf = getConf();
      conf.set("fs.defaultFS", "file:///");

      final Job job = Job.getInstance(conf, JOB_NAME);
      job.setJarByClass(getClass());

      FileInputFormat.setInputPaths(job, new Path(TEST_DATA_LOCATION));
      FileOutputFormat.setOutputPath(job, cleanPathForReuse(conf, OUTPUT_PATH));

      job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class);
      job.setReducerClass(Reducer.class); // (Identity Reducer)

      job.setInputFormatClass(GeonamesDataFileInputFormat.class);
      job.setOutputFormatClass(AccumuloFileOutputFormat.class);

      job.setMapOutputKeyClass(Key.class);
      job.setMapOutputValueClass(Value.class);
      job.setOutputKeyClass(Key.class);
      job.setOutputValueClass(Value.class);

      job.setNumReduceTasks(1);
      job.setSpeculativeExecution(false);

      boolean result = job.waitForCompletion(true);

      mapInputRecords =
          job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_INPUT_RECORDS).getValue();

      mapOutputRecords =
          job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_OUTPUT_RECORDS).getValue();

      return result ? 0 : 1;
    }
예제 #5
0
  @VisibleForTesting
  public Job createJob(
      int numMapper,
      int numReducer,
      int iReduceStagesCount,
      int numIReducer,
      long mapSleepTime,
      int mapSleepCount,
      long reduceSleepTime,
      int reduceSleepCount,
      long iReduceSleepTime,
      int iReduceSleepCount)
      throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    conf.setInt(IREDUCE_TASKS_COUNT, numIReducer);

    // Configure intermediate reduces
    conf.setInt(
        org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount);
    LOG.info("Running MRR with " + iReduceStagesCount + " IR stages");

    for (int i = 1; i <= iReduceStagesCount; ++i) {
      // Set reducer class for intermediate reduce
      conf.setClass(
          MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(
              i, "mapreduce.job.reduce.class"),
          ISleepReducer.class,
          Reducer.class);
      // Set reducer output key class
      conf.setClass(
          MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(
              i, "mapreduce.map.output.key.class"),
          IntWritable.class,
          Object.class);
      // Set reducer output value class
      conf.setClass(
          MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(
              i, "mapreduce.map.output.value.class"),
          IntWritable.class,
          Object.class);
      conf.setInt(
          MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"),
          numIReducer);
    }

    Job job = Job.getInstance(conf, "sleep");
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(MRRSleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(MRRSleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");

    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
  }
예제 #6
0
  /**
   * Run a map/reduce job for estimating Pi.
   *
   * @return the estimated value of Pi
   */
  public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
      throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
      throw new IOException(
          "Tmp directory "
              + fs.makeQualified(tmpDir)
              + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
      throw new IOException("Cannot create input directory " + inDir);
    }

    try {
      // generate an input file for each map task
      for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer =
            SequenceFile.createWriter(
                fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE);
        try {
          writer.append(offset, size);
        } finally {
          writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
      }

      // start a map/reduce job
      System.out.println("Starting Job");
      final long startTime = System.currentTimeMillis();
      job.waitForCompletion(true);
      final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
      System.out.println("Job Finished in " + duration + " seconds");

      // read outputs
      Path inFile = new Path(outDir, "reduce-out");
      LongWritable numInside = new LongWritable();
      LongWritable numOutside = new LongWritable();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
      try {
        reader.next(numInside, numOutside);
      } finally {
        reader.close();
      }

      // compute estimated value
      final BigDecimal numTotal =
          BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
      return BigDecimal.valueOf(4)
          .setScale(20)
          .multiply(BigDecimal.valueOf(numInside.get()))
          .divide(numTotal, RoundingMode.HALF_UP);
    } finally {
      fs.delete(tmpDir, true);
    }
  }