@Override public void searchDB(String keyword) { long t0 = System.nanoTime(); try { // First mapreduce phase setup HBaseConfiguration conf = config; Job job; job = new Job(conf, "MapReducePhase1"); job.setJarByClass(MapReduceHbaseDB.class); Scan scan = new Scan(); String columns = "myColumnFamily"; scan.addColumns(columns); scan.setCaching(10000); // Second mapreduce phase setup HBaseConfiguration conf2 = new HBaseConfiguration(); Job job2 = new Job(conf2, "MapReducePhase2"); job2.setJarByClass(MapReduceHbaseDB.class); Scan scan2 = new Scan(); String columns2 = "resultF"; scan2.addColumns(columns2); scan2.setCaching(10000); // Execution of the first mapreduce phase TableMapReduceUtil.initTableMapperJob( "myTable", scan, Mapper1.class, Text.class, Text.class, job); TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job); job.waitForCompletion(true); long t2 = System.nanoTime(); // Execution of the second mapreduce phase TableMapReduceUtil.initTableMapperJob( "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2); TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2); job2.waitForCompletion(true); long t1 = System.nanoTime(); double totalTime = (t1 - t0) / 1000000000.0; System.out.println("Total time for the search : " + totalTime + " seconds"); double firstPhaseTime = (t2 - t0) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds"); double secondPhaseTime = (t1 - t2) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
private void runTestOnTable(Table table) throws IOException, InterruptedException, ClassNotFoundException { Job job = null; try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); TableMapReduceUtil.initTableMapperJob( table.getName(), scan, MultithreadedTableMapper.class, ImmutableBytesWritable.class, Put.class, job); MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class); MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS); TableMapReduceUtil.initTableReducerJob( table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion"); // verify map-reduce results verify(table.getName()); } finally { table.close(); if (job != null) { FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir"))); } } }