コード例 #1
0
  @Override
  public void searchDB(String keyword) {
    long t0 = System.nanoTime();

    try {
      // First mapreduce phase setup
      HBaseConfiguration conf = config;
      Job job;
      job = new Job(conf, "MapReducePhase1");
      job.setJarByClass(MapReduceHbaseDB.class);
      Scan scan = new Scan();
      String columns = "myColumnFamily";
      scan.addColumns(columns);
      scan.setCaching(10000);

      // Second mapreduce phase setup
      HBaseConfiguration conf2 = new HBaseConfiguration();
      Job job2 = new Job(conf2, "MapReducePhase2");
      job2.setJarByClass(MapReduceHbaseDB.class);
      Scan scan2 = new Scan();
      String columns2 = "resultF";
      scan2.addColumns(columns2);
      scan2.setCaching(10000);

      // Execution of the first mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "myTable", scan, Mapper1.class, Text.class, Text.class, job);
      TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job);

      job.waitForCompletion(true);

      long t2 = System.nanoTime();

      // Execution of the second mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2);
      TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2);

      job2.waitForCompletion(true);

      long t1 = System.nanoTime();
      double totalTime = (t1 - t0) / 1000000000.0;
      System.out.println("Total time for the search : " + totalTime + " seconds");

      double firstPhaseTime = (t2 - t0) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds");

      double secondPhaseTime = (t1 - t2) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds");

    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    }
  }
コード例 #2
0
 private void runTestOnTable(Table table)
     throws IOException, InterruptedException, ClassNotFoundException {
   Job job = null;
   try {
     LOG.info("Before map/reduce startup");
     job = new Job(table.getConfiguration(), "process column contents");
     job.setNumReduceTasks(1);
     Scan scan = new Scan();
     scan.addFamily(INPUT_FAMILY);
     TableMapReduceUtil.initTableMapperJob(
         table.getName(),
         scan,
         MultithreadedTableMapper.class,
         ImmutableBytesWritable.class,
         Put.class,
         job);
     MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
     MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
     TableMapReduceUtil.initTableReducerJob(
         table.getName().getNameAsString(), IdentityTableReducer.class, job);
     FileOutputFormat.setOutputPath(job, new Path("test"));
     LOG.info("Started " + table.getName());
     assertTrue(job.waitForCompletion(true));
     LOG.info("After map/reduce completion");
     // verify map-reduce results
     verify(table.getName());
   } finally {
     table.close();
     if (job != null) {
       FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir")));
     }
   }
 }