コード例 #1
0
  @Override
  public int run(String[] args) {
    Opts opts = new Opts();
    opts.parseArgs(BulkIngestExample.class.getName(), args);

    Configuration conf = getConf();
    PrintStream out = null;
    try {
      Job job = JobUtil.getJob(conf);
      job.setJobName("bulk ingest example");
      job.setJarByClass(this.getClass());

      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(MapClass.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(Text.class);

      job.setReducerClass(ReduceClass.class);
      job.setOutputFormatClass(AccumuloFileOutputFormat.class);
      opts.setAccumuloConfigs(job);

      Connector connector = opts.getConnector();

      TextInputFormat.setInputPaths(job, new Path(opts.inputDir));
      AccumuloFileOutputFormat.setOutputPath(job, new Path(opts.workDir + "/files"));

      FileSystem fs = FileSystem.get(conf);
      out =
          new PrintStream(
              new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));

      Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
      for (Text split : splits)
        out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));

      job.setNumReduceTasks(splits.size() + 1);
      out.close();

      job.setPartitionerClass(RangePartitioner.class);
      RangePartitioner.setSplitFile(job, opts.workDir + "/splits.txt");

      job.waitForCompletion(true);
      Path failures = new Path(opts.workDir, "failures");
      fs.delete(failures, true);
      fs.mkdirs(new Path(opts.workDir, "failures"));
      connector
          .tableOperations()
          .importDirectory(
              opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);

    } catch (Exception e) {
      throw new RuntimeException(e);
    } finally {
      if (out != null) out.close();
    }

    return 0;
  }