@Override public void searchDB(String keyword) { long t0 = System.nanoTime(); try { // First mapreduce phase setup HBaseConfiguration conf = config; Job job; job = new Job(conf, "MapReducePhase1"); job.setJarByClass(MapReduceHbaseDB.class); Scan scan = new Scan(); String columns = "myColumnFamily"; scan.addColumns(columns); scan.setCaching(10000); // Second mapreduce phase setup HBaseConfiguration conf2 = new HBaseConfiguration(); Job job2 = new Job(conf2, "MapReducePhase2"); job2.setJarByClass(MapReduceHbaseDB.class); Scan scan2 = new Scan(); String columns2 = "resultF"; scan2.addColumns(columns2); scan2.setCaching(10000); // Execution of the first mapreduce phase TableMapReduceUtil.initTableMapperJob( "myTable", scan, Mapper1.class, Text.class, Text.class, job); TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job); job.waitForCompletion(true); long t2 = System.nanoTime(); // Execution of the second mapreduce phase TableMapReduceUtil.initTableMapperJob( "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2); TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2); job2.waitForCompletion(true); long t1 = System.nanoTime(); double totalTime = (t1 - t0) / 1000000000.0; System.out.println("Total time for the search : " + totalTime + " seconds"); double firstPhaseTime = (t2 - t0) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds"); double secondPhaseTime = (t1 - t2) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Scan scan = new Scan(); scan.addFamily(Cw09Constants.CF_FREQUENCIES_BYTES); scan.setBatch(Cw09Constants.CW09_INDEX_SCAN_BATCH); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); Job job = Job.getInstance(conf, "Count the total frequency of each term in the index table"); job.setJarByClass(TermHitsCounter.class); // TableMapReduceUtil.initTableMapperJob(Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, // ThcMapper.class, Text.class, LongWritable.class, job); TableMapReduceUtil.initTableMapperJob( Cw09Constants.CLUEWEB09_INDEX_TABLE_NAME, scan, ThcMapper.class, Text.class, LongWritable.class, job, true, CustomizedSplitTableInputFormat.class); job.setCombinerClass(ThcCombiner.class); TableMapReduceUtil.initTableReducerJob( Cw09Constants.CLUEWEB09_TERM_COUNT_TABLE_NAME, ThcReducer.class, job); job.setNumReduceTasks(40); return job; }
@Test public void shouldJoinTables() throws Exception { // given Job job = new Job(configuration, "Joins"); job.setJarByClass(AverageRatingMapper.class); List<Scan> scans = new ArrayList<>(); Scan scan1 = new Scan(); scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieData.TABLE_NAME)); scans.add(scan1); Scan scan2 = new Scan(); scan2.setAttribute( Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieRatingData.TABLE_NAME)); scans.add(scan2); TableMapReduceUtil.initTableMapperJob(scans, FilterMapper.class, null, null, job); // FileOutputFormat.setOutputPath(job, new Path("/tmp/sages/movies_with_ratings_" + // System.currentTimeMillis())); TableMapReduceUtil.initTableReducerJob(TABLE_NAME, null, job); job.setNumReduceTasks(0); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); }
public static void summaryHtable() throws Exception { Job job = new Job(conf, "ExampleSummary"); job.setJarByClass(HbaseMR.class); // class that contains mapper Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( "sourceTable", // input table scan, // Scan instance to control CF and attribute selection MyMapper3.class, // mapper class Text.class, // mapper output key IntWritable.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob( "targetTable", // output table MyTableReducer3.class, // reducer class job); job.setNumReduceTasks(1); // at least one, adjust as required boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); Job job = new Job(conf, SensorMR.class.getName() + "--<your name>"); // TODO job.setJarByClass(SensorMR.class); Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); TableMapReduceUtil.initTableMapperJob( tableRawData, scan, Mapper1.class, ImmutableBytesWritable.class, FloatWritable.class, job); TableMapReduceUtil.initTableReducerJob(tableSummaryData, Reducer1.class, job); TableMapReduceUtil.addDependencyJars(job); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * @param args the command line arguments * @throws java.io.IOException */ public static void main(String[] args) throws IOException, Exception { Conf conf = new Conf(); Job job = new Job(conf, "TweetsLanguage"); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( "hhscyber:tweets", scan, LanguageMapper.class, null, null, job); job.setNumReduceTasks(0); TableMapReduceUtil.initTableReducerJob("hhscyber:tweets_lang", null, job); job.waitForCompletion(true); }
/** Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
public static Job configureJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException, InterruptedException { Path inputPath = new Path("hdfs://Shrini:9000/stackexchange/stackexchange/PostQuestions.txt"); createTable(tableName, colFamilyNames, conf); Job job = new Job(conf, "myjob"); job.setJarByClass(MyHBaseMainClass.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(MyMapper.class); TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); job.waitForCompletion(true); return job; }
private static void createMapReduceJob( String tableNameToIndex, Configuration conf, int caching, int versions) throws IOException, InterruptedException, ClassNotFoundException { // Set the details to TableInputFormat Scan s = new Scan(); s.setCaching(caching); s.setMaxVersions(versions); conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex); Set<Entry<String, List<String>>> entrySet = cfs.entrySet(); for (Entry<String, List<String>> entry : entrySet) { List<String> quals = entry.getValue(); addColumn(quals, Bytes.toBytes(entry.getKey()), s); } Job job = new Job(conf, "CreateIndex"); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); TableMapReduceUtil.initTableMapperJob( tableNameToIndex, // input table s, // Scan instance to control CF and attribute selection IndexCreationMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob( IndexUtils.getIndexTableName(tableNameToIndex), // output // table null, // reducer class job); if (hfileOutPath != null) { HTable table = new HTable(conf, tableNameToIndex); job.setReducerClass(KeyValueSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat.configureIncrementalLoad(job, table); } else { job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars( job.getConfiguration(), com.google.common.base.Preconditions.class); job.waitForCompletion(true); assert job.isComplete() == true; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration hbaseConf = HBaseConfiguration.create(); hbaseConf.set( "hbase.zookeeper.quorum", "DamHadoop1,DamHadoop2,DamHadoop3"); // zookeeper服务器地址,要换成自己服务器地址 // Job jobConf=new Job(hbaseConf, "FreqCounter"); Job jobConf = Job.getInstance(hbaseConf, "FreqCounter"); jobConf.setJobName("Hbase_FreqCounter"); jobConf.setJarByClass(FreqCounter.class); Scan scan = new Scan(); String columns = "details"; scan.addFamily(Bytes.toBytes(columns)); scan.setFilter(new FirstKeyOnlyFilter()); TableMapReduceUtil.initTableMapperJob( "import_tests", scan, CountMapper.class, ImmutableBytesWritable.class, IntWritable.class, jobConf); TableMapReduceUtil.initTableReducerJob("summary_user", CountReducer.class, jobConf); System.exit(jobConf.waitForCompletion(true) ? 0 : 1); }
public boolean doTest() throws InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException, IOException, InterruptedException { /** create config */ conf_h = HBaseConfiguration.create(); conf_h.set("hbase.zookeeper.quorum", "localhost"); conf_h.set("hbase.zookeeper.property.clientPort", "2181"); Connection con_h = null; try { con_h = ConnectionFactory.createConnection(conf_h); } catch (IOException e) { e.printStackTrace(); } Admin admin = con_h.getAdmin(); HTableDescriptor tableDesc = new HTableDescriptor(tableName_chi); HColumnDescriptor colFamDesc = new HColumnDescriptor("count"); colFamDesc.setMaxVersions(1); tableDesc.addFamily(colFamDesc); admin.createTable(tableDesc); /** counting and insert in chiTable */ Scan scan = new Scan(); scan.addColumn(Bytes.toBytes("products"), Bytes.toBytes("product_category_id")); scan.addColumn(Bytes.toBytes("orders"), Bytes.toBytes("order_date")); // Creates a new Job with no particular Cluster Job job = Job.getInstance(conf_h, "Count"); // Job.getInstance(Configuration conf, String JobName) job.setJarByClass( ChiSquaredTest2_abc.class); // Set the Jar by finding where a given class came from // initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> // outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job) TableMapReduceUtil.initTableMapperJob( "retail_order", scan, Map1.class, Text.class, IntWritable.class, job); // initTableReducerJob(String table, Class<? extends TableReducer> reducer, // org.apache.hadoop.mapreduce.Job job) TableMapReduceUtil.initTableReducerJob("chiTable", Reduce1.class, job); // boolean waitForCompletion(boolean verbose), verbose - print the progress to the user job.waitForCompletion(true); // Submit the job to the cluster and wait for it to finish /** extract value from chiTable */ int totalY = 0; int totalN = 0; ArrayList<CellOfHTable> chiTable = new ArrayList<CellOfHTable>(); Table table_h = con_h.getTable(tableName_chi); Scan s = new Scan(); s.addFamily(Bytes.toBytes("count")); ResultScanner results = table_h.getScanner(s); for (Result r : results) { CellOfHTable c = new CellOfHTable( r.getRow(), r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")) == null ? Bytes.toBytes(0) : r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")), r.getValue(Bytes.toBytes("count"), Bytes.toBytes("N")) == null ? Bytes.toBytes(0) : r.getValue( Bytes.toBytes("count"), Bytes.toBytes("N"))); // (id, count_Y, count_N) chiTable.add(c); totalY = totalY + c.countY; totalN = totalN + c.countN; } results.close(); table_h.close(); admin.disableTable(tableName_chi); admin.deleteTable(tableName_chi); double chisquare = 0.0; for (int i = 0; i < chiTable.size(); i++) { CellOfHTable c = chiTable.get(i); double expectY = (double) (c.countY + c.countN) * (double) totalY / (double) (totalY + totalN); chisquare = chisquare + (((double) c.countY - expectY) * ((double) c.countY - expectY) / expectY); double expectN = (double) (c.countY + c.countN) * (double) totalN / (double) (totalY + totalN); chisquare = chisquare + (((double) c.countN - expectN) * ((double) c.countN - expectN) / expectN); } System.out.println(chisquare); ChiSquareDist csd = new ChiSquareDist((chiTable.size() - 1)); if (chisquare > csd.inverseF(1.0 - alpha)) { return true; } return false; }