@Override public void searchDB(String keyword) { long t0 = System.nanoTime(); try { // First mapreduce phase setup HBaseConfiguration conf = config; Job job; job = new Job(conf, "MapReducePhase1"); job.setJarByClass(MapReduceHbaseDB.class); Scan scan = new Scan(); String columns = "myColumnFamily"; scan.addColumns(columns); scan.setCaching(10000); // Second mapreduce phase setup HBaseConfiguration conf2 = new HBaseConfiguration(); Job job2 = new Job(conf2, "MapReducePhase2"); job2.setJarByClass(MapReduceHbaseDB.class); Scan scan2 = new Scan(); String columns2 = "resultF"; scan2.addColumns(columns2); scan2.setCaching(10000); // Execution of the first mapreduce phase TableMapReduceUtil.initTableMapperJob( "myTable", scan, Mapper1.class, Text.class, Text.class, job); TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job); job.waitForCompletion(true); long t2 = System.nanoTime(); // Execution of the second mapreduce phase TableMapReduceUtil.initTableMapperJob( "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2); TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2); job2.waitForCompletion(true); long t1 = System.nanoTime(); double totalTime = (t1 - t0) / 1000000000.0; System.out.println("Total time for the search : " + totalTime + " seconds"); double firstPhaseTime = (t2 - t0) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds"); double secondPhaseTime = (t1 - t2) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(CachingRowCounter.class); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 1; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); if (sb.length() > 0) { for (String columnName : sb.toString().split(" ")) { String[] fields = columnName.split(":"); if (fields.length == 1) { scan.addFamily(Bytes.toBytes(fields[0])); } else { scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1])); } } } scan.setCaching(100); // Second argument is the table name. job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
public static void main(String[] args) throws Exception { new JobConf().setSpeculativeExecution(false); Configuration conf = new Configuration(); conf.set("es.nodes", ES_NODES); conf.set("es.resource", ES_RESOURCE); conf.set("es.mapping.id", HBaseTableMapper.ID_FIELD.toString()); conf.set("es.batch.size.bytes", "10mb"); conf.set("es.batch.size.entries", "10000"); conf.set("es.batch.write.refresh", "false"); Job job = new Job(conf); job.setJarByClass(BulkIndex.class); job.setMapperClass(HBaseTableMapper.class); job.setNumReduceTasks(0); job.setSpeculativeExecution(false); job.setOutputFormatClass(BulkProcessorOutputFormat.class); job.setMapOutputValueClass(Text.class); Scan scan = new Scan(); scan.setCaching(1000); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob( BulkLoad.HBASE_TABLE_NAME, scan, HBaseTableMapper.class, NullWritable.class, MapWritable.class, job); job.waitForCompletion(true); }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Scan scan = new Scan(); scan.addFamily(Cw09Constants.CF_FREQUENCIES_BYTES); scan.setBatch(Cw09Constants.CW09_INDEX_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance(conf, "Count the total frequency of each term in the index table"); job.setJarByClass(TermHitsCounter.class); // TableMapReduceUtil.initTableMapperJob(Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, // ThcMapper.class, Text.class, LongWritable.class, job); TableMapReduceUtil.initTableMapperJob( Cw09Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, ThcMapper.class, Text.class, LongWritable.class, job, true, CustomizedSplitTableInputFormat.class); job.setCombinerClass(ThcCombiner.class); TableMapReduceUtil.initTableReducerJob( Cw09Constants.CLUEWEB09_TERM_COUNT_TABLE_NAME, ThcReducer.class, job); job.setNumReduceTasks(40); return job; }
private void testMapReduceInternal( long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData( origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob( TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in // TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }
@Test public void shouldJoinTables() throws Exception { // given Job job = new Job(configuration, "Joins"); job.setJarByClass(AverageRatingMapper.class); List<Scan> scans = new ArrayList<>(); Scan scan1 = new Scan(); scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieData.TABLE_NAME)); scans.add(scan1); Scan scan2 = new Scan(); scan2.setAttribute( Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieRatingData.TABLE_NAME)); scans.add(scan2); TableMapReduceUtil.initTableMapperJob(scans, FilterMapper.class, null, null, job); // FileOutputFormat.setOutputPath(job, new Path("/tmp/sages/movies_with_ratings_" + // System.currentTimeMillis())); TableMapReduceUtil.initTableReducerJob(TABLE_NAME, null, job); job.setNumReduceTasks(0); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); }
public static void main(String[] args) throws Exception { Configuration con = new Configuration(); String[] otherArgs = new GenericOptionsParser(con, args).getRemainingArgs(); HBaseConfiguration conf = new HBaseConfiguration(); Job job = new Job(conf, "AverageCalc"); job.setJarByClass(AverageCalculator.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes("Post")); FilterList li = new FilterList(FilterList.Operator.MUST_PASS_ALL); SingleColumnValueFilter filter = new SingleColumnValueFilter( Bytes.toBytes("Post"), Bytes.toBytes("PostTypeId"), CompareOp.EQUAL, Bytes.toBytes("1")); li.addFilter(filter); scan.setFilter(li); FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); job.setOutputKeyClass(Text.class); TableMapReduceUtil.initTableMapperJob( "bigd24-hbase-sample", scan, Mapper1.class, Text.class, IntWritable.class, job); job.setReducerClass(Reducer1.class); job.setOutputValueClass(FloatWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void htableFile() throws Exception { Job job = new Job(conf, "ExampleSummaryToFile"); job.setJarByClass(HbaseMR.class); // class that contains mapper Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( "sourceTable", // input table scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper class Text.class, // mapper output key IntWritable.class, // mapper output value job); job.setReducerClass(MyReducer4.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required FileOutputFormat.setOutputPath(new JobConf(conf), new Path("/tmp/mr/mySummaryFile")); // adjust // directories // as // required boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
public static void readWriteHtable() throws Exception { Job job = new Job(conf, "ExampleReadWrite"); job.setJarByClass(HbaseMR.class); // class that contains mapper Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( "sourceTable", // input table scan, // Scan instance to control CF and attribute selection MyMapper2.class, // mapper class null, // mapper output key null, // mapper output value job); TableMapReduceUtil.initTableReducerJob( "targetTable", // output table null, // reducer class job); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String rowKeyType = args[3]; conf.set("row.key.type", rowKeyType); conf.set("table.name", tableName); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes(columnFamily)); scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance( conf, "Count the column count and indexRecordSize for each row in " + tableName); job.setJarByClass(TruthyIndexFeatureCounter.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, TfcMapper.class, Text.class, Text.class, job, true); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); TableMapReduceUtil.addDependencyJars(job); return job; }
@Test public void shouldRunMapReduce() throws Exception { // given Job job = new Job(configuration, "Average Rating"); job.setJarByClass(AverageRatingMapper.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes(LoadMovieRatingData.FAMILY_NAME)); TableMapReduceUtil.initTableMapperJob( LoadMovieRatingData.TABLE_NAME, scan, AverageRatingMapper.class, Text.class, DoubleWritable.class, job); job.setReducerClass(RatingExportReducer.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath( job, new Path("/tmp/mr/mySummaryFile_" + System.currentTimeMillis())); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); }
public int run(String[] args) throws Exception { if (args.length != 1) { System.out.println("usage: CountRows <table name>"); return 1; } Configuration conf = getConf(); try { String tableName = args[0]; LOG.info("Before map/reduce startup"); Job job = new Job(conf, "query: count rows"); job.setJarByClass(this.getClass()); job.getConfiguration().set(TABLE_NAME, args[0]); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( tableName, scan, CountRowMapper.class, ImmutableBytesWritable.class, Put.class, job); // TableMapReduceUtil.initTableReducerJob(tableName, // IdentityTableReducer.class, job); job.setNumReduceTasks(0); LOG.info("Started " + tableName); job.waitForCompletion(true); LOG.info("After map/reduce completion"); } catch (Exception e) { e.printStackTrace(); return 1; } return 0; }
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); Job job = new Job(conf, SensorMR.class.getName() + "--<your name>"); // TODO job.setJarByClass(SensorMR.class); Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); TableMapReduceUtil.initTableMapperJob( tableRawData, scan, Mapper1.class, ImmutableBytesWritable.class, FloatWritable.class, job); TableMapReduceUtil.initTableReducerJob(tableSummaryData, Reducer1.class, job); TableMapReduceUtil.addDependencyJars(job); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static Job startJob(String[] args) throws IOException { // args[0] = hbase table name // args[1] = zookeeper Configuration hConf = HBaseConfiguration.create(new Configuration()); hConf.set("hbase.zookeeper.quorum", args[1]); hConf.set("scan.table", args[0]); hConf.set("hbase.zookeeper.property.clientPort", "2181"); Scan scan = new Scan(); // scan.setFilter(rowColBloomFilter()); Job job = new Job(hConf); job.setJobName("BSBM-Q11-RepartitionJoin"); job.setJarByClass(RepartitionJoinQ11.class); // Change caching to speed up the scan scan.setCaching(500); scan.setMaxVersions(200); scan.setCacheBlocks(false); // Mapper settings TableMapReduceUtil.initTableMapperJob( args[0], // input HBase table name scan, // Scan instance to control CF and attribute selection RepartitionMapper.class, // mapper CompositeKeyWritable.class, // mapper output key KeyValueArrayWritable.class, // mapper output value job); // Repartition settings job.setPartitionerClass(CompositePartitioner.class); job.setSortComparatorClass(CompositeSortComparator.class); job.setGroupingComparatorClass(CompositeGroupingComparator.class); // Reducer settings job.setReducerClass(SharedServices.RepartitionJoin_Reducer.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required FileOutputFormat.setOutputPath(job, new Path("output/BSBMQ11")); try { System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return job; }
/** * @param args the command line arguments * @throws java.io.IOException */ public static void main(String[] args) throws IOException, Exception { Conf conf = new Conf(); Job job = new Job(conf, "TweetsLanguage"); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( "hhscyber:tweets", scan, LanguageMapper.class, null, null, job); job.setNumReduceTasks(0); TableMapReduceUtil.initTableReducerJob("hhscyber:tweets_lang", null, job); job.waitForCompletion(true); }
private static void createMapReduceJob( String tableNameToIndex, Configuration conf, int caching, int versions) throws IOException, InterruptedException, ClassNotFoundException { // Set the details to TableInputFormat Scan s = new Scan(); s.setCaching(caching); s.setMaxVersions(versions); conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex); Set<Entry<String, List<String>>> entrySet = cfs.entrySet(); for (Entry<String, List<String>> entry : entrySet) { List<String> quals = entry.getValue(); addColumn(quals, Bytes.toBytes(entry.getKey()), s); } Job job = new Job(conf, "CreateIndex"); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); TableMapReduceUtil.initTableMapperJob( tableNameToIndex, // input table s, // Scan instance to control CF and attribute selection IndexCreationMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob( IndexUtils.getIndexTableName(tableNameToIndex), // output // table null, // reducer class job); if (hfileOutPath != null) { HTable table = new HTable(conf, tableNameToIndex); job.setReducerClass(KeyValueSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat.configureIncrementalLoad(job, table); } else { job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars( job.getConfiguration(), com.google.common.base.Preconditions.class); job.waitForCompletion(true); assert job.isComplete() == true; }
@Override public int run(String[] strings) throws Exception { Configuration conf = new Configuration(); // String inputFileName = "/cluster/gmm.seq"; String outputFileName = "/cluster/matrix_intermediate_" + level + ".seq"; int result; System.out.println("level:" + level); conf.set("level", level + ""); String table = "ClusterDatabase"; // String seqFileName = "/cluster/gmm.seq"; Scan scan = new Scan(); scan.setStartRow((level + "|").getBytes()); scan.setStopRow( Bytes.add((level + "|").getBytes(), Bytes.toBytes("ffffffffffffffffffffffffffffffff"))); scan.addColumn("Cluster".getBytes(), "GMM".getBytes()); // try (FileSystem fileSystem = FileSystem.get(conf)) { FileSystem fileSystem = FileSystem.get(conf); Path outputpath = new Path(outputFileName); if (fileSystem.exists(outputpath)) { fileSystem.delete(outputpath, true); } Job job = new Job(conf, "Matrix Creation I From HBase"); job.setJarByClass(MatrixCreationI.class); TableMapReduceUtil.initTableMapperJob( table, scan, MatrixMapper.class, IntWritable.class, Text.class, job); job.setReducerClass(MatrixReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // job.setInputFormatClass(TableInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(1); // FileInputFormat.addInputPath(job, new Path(inputFileName + "/part*")); FileOutputFormat.setOutputPath(job, outputpath); result = job.waitForCompletion(true) ? 0 : 1; // } return result; }
@Override public int run(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: SimpleRowCounter <tablename>"); return -1; } String tableName = args[0]; Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); Job job = new Job(getConf(), getClass().getSimpleName()); job.setJarByClass(getClass()); // Set the Jar by finding where a given class came from. TableMapReduceUtil.initTableMapperJob( tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); // 不需要reduce过程 job.setOutputFormatClass(NullOutputFormat.class); // 不需要输出 return job.waitForCompletion(true) ? 0 : 1; }
// MapReduce Stage-1 Job public static Job startJob_Stage1(String[] args, Configuration hConf) throws IOException { // args[0] = hbase table name // args[1] = zookeeper /* * MapReduce Stage-1 Job * Retrieve a list of subjects and their attributes */ Scan scan1 = new Scan(); Job job1 = new Job(hConf); job1.setJobName("BSBM-Q8-RepartitionJoin"); job1.setJarByClass(RepartitionJoinQ8.class); // Change caching and number of time stamps to speed up the scan scan1.setCaching(500); scan1.setMaxVersions(200); scan1.setCacheBlocks(false); // Mapper settings TableMapReduceUtil.initTableMapperJob( args[0], // input HBase table name scan1, // Scan instance to control CF and attribute selection RepartitionMapper.class, // mapper class CompositeKeyWritable.class, // mapper output key KeyValueArrayWritable.class, // mapper output value job1); // Reducer settings job1.setReducerClass(RepartitionReducer.class); job1.setOutputFormatClass(TextOutputFormat.class); // job1.setNumReduceTasks(1); // Uncomment this if running into problems on 2+ node cluster FileOutputFormat.setOutputPath(job1, new Path("output/BSBMQ8")); try { job1.waitForCompletion(true); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return job1; }
public static void readHtable() throws Exception { Job job = new Job(conf, "ExampleRead"); job.setJarByClass(HbaseMR.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs TableMapReduceUtil.initTableMapperJob( "", // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(NullOutputFormat.class); // because we aren't // emitting anything // from mapper boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
@Test public void shouldRunMapReduce() throws Exception { // given Configuration configuration = HBaseConfiguration.create(); TableFactory.recreateTable( configuration, Bytes.toString(UsersDao.TABLE_NAME), Bytes.toString(UsersDao.FAMILY_NAME)); UserDataFactory.insertTestData(); // map reduce Job job = new Job(configuration, "Count Users"); job.setJarByClass(CountUsersMapper.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addColumn(UsersDao.FAMILY_NAME, UsersDao.FORENAME_COL); // mapper TableMapReduceUtil.initTableMapperJob( Bytes.toString(UsersDao.TABLE_NAME), scan, CountUsersMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); assertThat(job.getCounters().findCounter(CountUsersMapper.Counters.USER_COUNT).getValue()) .isGreaterThan(99); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration hbaseConf = HBaseConfiguration.create(); hbaseConf.set( "hbase.zookeeper.quorum", "DamHadoop1,DamHadoop2,DamHadoop3"); // zookeeper服务器地址,要换成自己服务器地址 // Job jobConf=new Job(hbaseConf, "FreqCounter"); Job jobConf = Job.getInstance(hbaseConf, "FreqCounter"); jobConf.setJobName("Hbase_FreqCounter"); jobConf.setJarByClass(FreqCounter.class); Scan scan = new Scan(); String columns = "details"; scan.addFamily(Bytes.toBytes(columns)); scan.setFilter(new FirstKeyOnlyFilter()); TableMapReduceUtil.initTableMapperJob( "import_tests", scan, CountMapper.class, ImmutableBytesWritable.class, IntWritable.class, jobConf); TableMapReduceUtil.initTableReducerJob("summary_user", CountReducer.class, jobConf); System.exit(jobConf.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out.println( "ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class); job.setJobName("ExportHBaseTableToDelimiteredSeq "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob( table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); if (compressionCodec.equals("snappy")) { SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
public boolean doTest() throws InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException, IOException, InterruptedException { /** create config */ conf_h = HBaseConfiguration.create(); conf_h.set("hbase.zookeeper.quorum", "localhost"); conf_h.set("hbase.zookeeper.property.clientPort", "2181"); Connection con_h = null; try { con_h = ConnectionFactory.createConnection(conf_h); } catch (IOException e) { e.printStackTrace(); } Admin admin = con_h.getAdmin(); HTableDescriptor tableDesc = new HTableDescriptor(tableName_chi); HColumnDescriptor colFamDesc = new HColumnDescriptor("count"); colFamDesc.setMaxVersions(1); tableDesc.addFamily(colFamDesc); admin.createTable(tableDesc); /** counting and insert in chiTable */ Scan scan = new Scan(); scan.addColumn(Bytes.toBytes("products"), Bytes.toBytes("product_category_id")); scan.addColumn(Bytes.toBytes("orders"), Bytes.toBytes("order_date")); // Creates a new Job with no particular Cluster Job job = Job.getInstance(conf_h, "Count"); // Job.getInstance(Configuration conf, String JobName) job.setJarByClass( ChiSquaredTest2_abc.class); // Set the Jar by finding where a given class came from // initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> // outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job) TableMapReduceUtil.initTableMapperJob( "retail_order", scan, Map1.class, Text.class, IntWritable.class, job); // initTableReducerJob(String table, Class<? extends TableReducer> reducer, // org.apache.hadoop.mapreduce.Job job) TableMapReduceUtil.initTableReducerJob("chiTable", Reduce1.class, job); // boolean waitForCompletion(boolean verbose), verbose - print the progress to the user job.waitForCompletion(true); // Submit the job to the cluster and wait for it to finish /** extract value from chiTable */ int totalY = 0; int totalN = 0; ArrayList<CellOfHTable> chiTable = new ArrayList<CellOfHTable>(); Table table_h = con_h.getTable(tableName_chi); Scan s = new Scan(); s.addFamily(Bytes.toBytes("count")); ResultScanner results = table_h.getScanner(s); for (Result r : results) { CellOfHTable c = new CellOfHTable( r.getRow(), r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")) == null ? Bytes.toBytes(0) : r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")), r.getValue(Bytes.toBytes("count"), Bytes.toBytes("N")) == null ? Bytes.toBytes(0) : r.getValue( Bytes.toBytes("count"), Bytes.toBytes("N"))); // (id, count_Y, count_N) chiTable.add(c); totalY = totalY + c.countY; totalN = totalN + c.countN; } results.close(); table_h.close(); admin.disableTable(tableName_chi); admin.deleteTable(tableName_chi); double chisquare = 0.0; for (int i = 0; i < chiTable.size(); i++) { CellOfHTable c = chiTable.get(i); double expectY = (double) (c.countY + c.countN) * (double) totalY / (double) (totalY + totalN); chisquare = chisquare + (((double) c.countY - expectY) * ((double) c.countY - expectY) / expectY); double expectN = (double) (c.countY + c.countN) * (double) totalN / (double) (totalY + totalN); chisquare = chisquare + (((double) c.countN - expectN) * ((double) c.countN - expectN) / expectN); } System.out.println(chisquare); ChiSquareDist csd = new ChiSquareDist((chiTable.size() - 1)); if (chisquare > csd.inverseF(1.0 - alpha)) { return true; } return false; }
public static void main(String args[]) throws Exception { try { Configuration config = HBaseConfiguration.create(); config.set("mapred.output.dir", "/mnt/data/workspace/weatheranalysis/mapred/monthly"); Job job = new Job(config, "MonthlySummary"); String sourceTable = DailyWeatherHbaseOperations.TABLE_NAME; Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad // for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( sourceTable, // input table scan, // Scan instance to control CF and attribute selection MonthlyWeatherMapper.class, // mapper class Text.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(MonthlyWeatherReducer.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required Path out = new Path("/mnt/data/workspace/weatheranalysis/mapred/monthly"); File outDir = new File(out.toString()); FileUtil.fullyDelete(outDir); FileOutputFormat.setOutputPath(job, out); MonthlyWeatherHbaseOperations.useTable(); // The required Total Precipitate for a particular station for an year /* List<Float> ForAllMonthsPpt = new ArrayList<Float>(); List<MonthlyWeather> lr = MonthlyWeatherHbaseOperations.get(425010,2013); while(lr.isEmpty()) { ForAllMonthsPpt.add(lr.get(4)); } * * * * */ job.waitForCompletion(true); System.out.println("Job Completed."); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }