@Override public int run(String[] args) throws Exception { String instance = args[0]; String zookeepers = args[1]; String user = args[2]; String tokenFile = args[3]; String input = args[4]; String tableName = args[5]; Job job = Job.getInstance(getConf()); job.setJobName(TokenFileWordCount.class.getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, input); job.setMapperClass(MapClass.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); // AccumuloInputFormat not used here, but it uses the same functions. AccumuloOutputFormat.setZooKeeperInstance( job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers)); AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tableName); job.waitForCompletion(true); return 0; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); if (job.getJar() == null) { log.error("M/R requires a jar file! Run mvn package."); return 1; } job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1])); AccumuloInputFormat.setInputTableName(job, args[2]); AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY); AccumuloInputFormat.setZooKeeperInstance(job, args[3], args[4]); job.setMapperClass(SeqMapClass.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1])); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, args[5]); AccumuloOutputFormat.setZooKeeperInstance(job, args[3], args[4]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
/** The run method which sets the configuration and starts the MapReduce job */ public int run(String[] args) throws Exception { if (USE_MINI_ACCUMULO) { Connector connector = LocalEnvUtil.getConnector(userPass); userName = "******"; instanceName = connector.getInstance().getInstanceName(); zookeepers = connector.getInstance().getZooKeepers(); } // Create and initialize a MapReduce Job Job job = Job.getInstance(getConf(), "tweetIndexer"); job.setJarByClass(IndexedDocIndexer.class); // Set the AccumuloInputFormat so the mapper can read from Accumulo AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(userPass)); AccumuloInputFormat.setInputTableName(job, twitterDataTable); AccumuloInputFormat.setScanAuthorizations(job, new Authorizations()); ClientConfiguration clientConfig = new ClientConfiguration(); clientConfig.withInstance(instanceName); clientConfig.withZkHosts(zookeepers); AccumuloInputFormat.setZooKeeperInstance(job, clientConfig); AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(userPass)); AccumuloOutputFormat.setCreateTables(job, createTables); AccumuloOutputFormat.setDefaultTableName(job, tweetDocIndex); AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig); // Set the map and reduce classes job.setMapperClass(TweetMapper.class); job.setReducerClass(TweetReducer.class); // Set the output key and value class for the mapper job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Set the output key and value class for the reducer job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); // Set the InputFormat and OutputFormat for the job job.setInputFormatClass(AccumuloInputFormat.class); job.setOutputFormatClass(AccumuloOutputFormat.class); // Run the MapReduce job and return 0 for success, 1 otherwise return job.waitForCompletion(true) ? 0 : 1; }
/** * Instantiate a RecordWriter as required. This will create an RecordWriter from the internal * AccumuloOutputFormat */ @Override public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (zoomLevel == -1) { zoomLevel = Integer.parseInt( context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL)); } if (_innerFormat == null) { initialize(context); } if (_innerRecordWriter == null) { _innerRecordWriter = _innerFormat.getRecordWriter(context); } String pl = context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ); if (colViz == null) { colViz = new ColumnVisibility(pl); } AccumuloMrGeoRecordWriter outRW = new AccumuloMrGeoRecordWriter( zoomLevel, table, _innerRecordWriter, new String(colViz.getExpression())); return outRW; } // end getRecordWriter
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { // make sure the inner format is created if (_innerFormat == null) { initialize(context); } // make sure output specs are dealt with _innerFormat.checkOutputSpecs(context); } // end checkOutputSpecs
// this is a tool because when you run a mapreduce, you will need to use the // ToolRunner // if you want libjars to be passed properly to the map and reduce tasks // even though this class isn't a mapreduce @Override public int run(String[] args) throws Exception { if (args.length != 5) { System.out.println( "Usage: bin/tool.sh " + this.getClass().getName() + " <instance name> <zoo keepers> <username> <password> <tablename>"); return 1; } Text tableName = new Text(args[4]); Job job = new Job(getConf()); Configuration conf = job.getConfiguration(); AccumuloOutputFormat.setZooKeeperInstance(conf, args[0], args[1]); AccumuloOutputFormat.setOutputInfo(conf, args[2], args[3].getBytes(), true, null); job.setOutputFormatClass(AccumuloOutputFormat.class); // when running a mapreduce, you won't need to instantiate the output // format and record writer // mapreduce will do that for you, and you will just use // output.collect(tableName, mutation) TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID()); RecordWriter<Text, Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context); Text colf = new Text("colfam"); System.out.println("writing ..."); for (int i = 0; i < 10000; i++) { Mutation m = new Mutation(new Text(String.format("row_%d", i))); for (int j = 0; j < 5; j++) { m.put( colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes())); } rw.write(tableName, m); // repeat until done if (i % 100 == 0) System.out.println(i); } rw.close(context); // close when done return 0; }