@Override
  public int run(String[] args) throws Exception {

    String instance = args[0];
    String zookeepers = args[1];
    String user = args[2];
    String tokenFile = args[3];
    String input = args[4];
    String tableName = args[5];

    Job job = Job.getInstance(getConf());
    job.setJobName(TokenFileWordCount.class.getName());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, input);

    job.setMapperClass(MapClass.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    // AccumuloInputFormat not used here, but it uses the same functions.
    AccumuloOutputFormat.setZooKeeperInstance(
        job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
    AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tableName);

    job.waitForCompletion(true);
    return 0;
  }
Esempio n. 2
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
      log.error("M/R requires a jar file!  Run mvn package.");
      return 1;
    }

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
    AccumuloInputFormat.setInputTableName(job, args[2]);
    AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
    AccumuloInputFormat.setZooKeeperInstance(job, args[3], args[4]);

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job, args[3], args[4]);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }
  /** The run method which sets the configuration and starts the MapReduce job */
  public int run(String[] args) throws Exception {

    if (USE_MINI_ACCUMULO) {
      Connector connector = LocalEnvUtil.getConnector(userPass);
      userName = "******";
      instanceName = connector.getInstance().getInstanceName();
      zookeepers = connector.getInstance().getZooKeepers();
    }

    // Create and initialize a MapReduce Job
    Job job = Job.getInstance(getConf(), "tweetIndexer");
    job.setJarByClass(IndexedDocIndexer.class);

    // Set the AccumuloInputFormat so the mapper can read from Accumulo

    AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(userPass));

    AccumuloInputFormat.setInputTableName(job, twitterDataTable);

    AccumuloInputFormat.setScanAuthorizations(job, new Authorizations());

    ClientConfiguration clientConfig = new ClientConfiguration();
    clientConfig.withInstance(instanceName);
    clientConfig.withZkHosts(zookeepers);

    AccumuloInputFormat.setZooKeeperInstance(job, clientConfig);

    AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(userPass));

    AccumuloOutputFormat.setCreateTables(job, createTables);

    AccumuloOutputFormat.setDefaultTableName(job, tweetDocIndex);

    AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);

    // Set the map and reduce classes
    job.setMapperClass(TweetMapper.class);
    job.setReducerClass(TweetReducer.class);

    // Set the output key and value class for the mapper
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Set the output key and value class for the reducer
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    // Set the InputFormat and OutputFormat for the job
    job.setInputFormatClass(AccumuloInputFormat.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    // Run the MapReduce job and return 0 for success, 1 otherwise
    return job.waitForCompletion(true) ? 0 : 1;
  }
  /**
   * Instantiate a RecordWriter as required. This will create an RecordWriter from the internal
   * AccumuloOutputFormat
   */
  @Override
  public RecordWriter getRecordWriter(TaskAttemptContext context)
      throws IOException, InterruptedException {

    if (zoomLevel == -1) {
      zoomLevel =
          Integer.parseInt(
              context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL));
    }

    if (_innerFormat == null) {
      initialize(context);
    }

    if (_innerRecordWriter == null) {
      _innerRecordWriter = _innerFormat.getRecordWriter(context);
    }
    String pl = context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ);
    if (colViz == null) {
      colViz = new ColumnVisibility(pl);
    }
    AccumuloMrGeoRecordWriter outRW =
        new AccumuloMrGeoRecordWriter(
            zoomLevel, table, _innerRecordWriter, new String(colViz.getExpression()));

    return outRW;
  } // end getRecordWriter
Esempio n. 5
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }
  @Override
  public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {

    // make sure the inner format is created
    if (_innerFormat == null) {
      initialize(context);
    }

    // make sure output specs are dealt with
    _innerFormat.checkOutputSpecs(context);
  } // end checkOutputSpecs
  // this is a tool because when you run a mapreduce, you will need to use the
  // ToolRunner
  // if you want libjars to be passed properly to the map and reduce tasks
  // even though this class isn't a mapreduce
  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 5) {
      System.out.println(
          "Usage: bin/tool.sh "
              + this.getClass().getName()
              + " <instance name> <zoo keepers> <username> <password> <tablename>");
      return 1;
    }
    Text tableName = new Text(args[4]);
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();
    AccumuloOutputFormat.setZooKeeperInstance(conf, args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(conf, args[2], args[3].getBytes(), true, null);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    // when running a mapreduce, you won't need to instantiate the output
    // format and record writer
    // mapreduce will do that for you, and you will just use
    // output.collect(tableName, mutation)
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    RecordWriter<Text, Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context);

    Text colf = new Text("colfam");
    System.out.println("writing ...");
    for (int i = 0; i < 10000; i++) {
      Mutation m = new Mutation(new Text(String.format("row_%d", i)));
      for (int j = 0; j < 5; j++) {
        m.put(
            colf,
            new Text(String.format("colqual_%d", j)),
            new Value((String.format("value_%d_%d", i, j)).getBytes()));
      }
      rw.write(tableName, m); // repeat until done
      if (i % 100 == 0) System.out.println(i);
    }

    rw.close(context); // close when done
    return 0;
  }