private static void createMapReduceJob( String tableNameToIndex, Configuration conf, int caching, int versions) throws IOException, InterruptedException, ClassNotFoundException { // Set the details to TableInputFormat Scan s = new Scan(); s.setCaching(caching); s.setMaxVersions(versions); conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex); Set<Entry<String, List<String>>> entrySet = cfs.entrySet(); for (Entry<String, List<String>> entry : entrySet) { List<String> quals = entry.getValue(); addColumn(quals, Bytes.toBytes(entry.getKey()), s); } Job job = new Job(conf, "CreateIndex"); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); TableMapReduceUtil.initTableMapperJob( tableNameToIndex, // input table s, // Scan instance to control CF and attribute selection IndexCreationMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob( IndexUtils.getIndexTableName(tableNameToIndex), // output // table null, // reducer class job); if (hfileOutPath != null) { HTable table = new HTable(conf, tableNameToIndex); job.setReducerClass(KeyValueSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat.configureIncrementalLoad(job, table); } else { job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars( job.getConfiguration(), com.google.common.base.Preconditions.class); job.waitForCompletion(true); assert job.isComplete() == true; }
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.out.println("Caching and Versions not specified"); System.exit(-1); } int caching = -1; int versions = -1; try { caching = Integer.parseInt(otherArgs[0]); } catch (NumberFormatException nfe) { caching = DEFAULT_CACHING; } try { versions = Integer.parseInt(otherArgs[1]); } catch (NumberFormatException nfe) { versions = DEFAULT_VERSIONS; } String[] tableName = conf.getStrings(TABLE_NAME_TO_INDEX); if (tableName == null) { System.out.println( "Wrong usage. Usage is pass the table -Dindex.tablename='table1' " + "-Dtable.columns.index='IDX1=>cf1:[q1->datatype& length],[q2]," + "[q3];cf2:[q1->datatype&length],[q2->datatype&length],[q3->datatype& lenght]#IDX2=>cf1:q5,q5'"); System.out.println("The format used here is: "); System.out.println("IDX1 - Index name"); System.out.println("cf1 - Columnfamilyname"); System.out.println("q1 - qualifier"); System.out.println("datatype - datatype (Int, String, Double, Float)"); System.out.println("length - length of the value"); System.out.println("The columnfamily should be seperated by ';'"); System.out.println( "The qualifier and the datatype and its length should be enclosed in '[]'." + " The qualifier details are specified using '->' following qualifer name and the details are seperated by '&'"); System.out.println("If the qualifier details are not specified default values are used."); System.out.println("# is used to seperate between two index details"); System.out.println("Pass the scanner caching and maxversions as arguments."); System.exit(-1); } String tableNameToIndex = tableName[0]; IndexUtils.createIndexTable(tableNameToIndex, conf, cfs); createMapReduceJob(tableNameToIndex, conf, caching, versions); }