コード例 #1
0
 private void runTestOnTable(Table table)
     throws IOException, InterruptedException, ClassNotFoundException {
   Job job = null;
   try {
     LOG.info("Before map/reduce startup");
     job = new Job(table.getConfiguration(), "process column contents");
     job.setNumReduceTasks(1);
     Scan scan = new Scan();
     scan.addFamily(INPUT_FAMILY);
     TableMapReduceUtil.initTableMapperJob(
         table.getName(),
         scan,
         MultithreadedTableMapper.class,
         ImmutableBytesWritable.class,
         Put.class,
         job);
     MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
     MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
     TableMapReduceUtil.initTableReducerJob(
         table.getName().getNameAsString(), IdentityTableReducer.class, job);
     FileOutputFormat.setOutputPath(job, new Path("test"));
     LOG.info("Started " + table.getName());
     assertTrue(job.waitForCompletion(true));
     LOG.info("After map/reduce completion");
     // verify map-reduce results
     verify(table.getName());
   } finally {
     table.close();
     if (job != null) {
       FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir")));
     }
   }
 }
コード例 #2
0
ファイル: HFileOutputFormat2.java プロジェクト: mringg/hbase
  static void configureIncrementalLoad(
      Job job,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Class<? extends OutputFormat<?, ?>> cls)
      throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(TextSortReducer.class);
    } else {
      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings(
        "io.serializations",
        conf.get("io.serializations"),
        MutationSerialization.class.getName(),
        ResultSerialization.class.getName(),
        KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
      // record this table name for creating writer by favored nodes
      LOG.info("bulkload locality sensitive enabled");
      conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + regionLocator.getName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info(
        "Configuring "
            + startKeys.size()
            + " reduce partitions "
            + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
  }
コード例 #3
0
  @Override
  public void searchDB(String keyword) {
    long t0 = System.nanoTime();

    try {
      // First mapreduce phase setup
      HBaseConfiguration conf = config;
      Job job;
      job = new Job(conf, "MapReducePhase1");
      job.setJarByClass(MapReduceHbaseDB.class);
      Scan scan = new Scan();
      String columns = "myColumnFamily";
      scan.addColumns(columns);
      scan.setCaching(10000);

      // Second mapreduce phase setup
      HBaseConfiguration conf2 = new HBaseConfiguration();
      Job job2 = new Job(conf2, "MapReducePhase2");
      job2.setJarByClass(MapReduceHbaseDB.class);
      Scan scan2 = new Scan();
      String columns2 = "resultF";
      scan2.addColumns(columns2);
      scan2.setCaching(10000);

      // Execution of the first mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "myTable", scan, Mapper1.class, Text.class, Text.class, job);
      TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job);

      job.waitForCompletion(true);

      long t2 = System.nanoTime();

      // Execution of the second mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2);
      TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2);

      job2.waitForCompletion(true);

      long t1 = System.nanoTime();
      double totalTime = (t1 - t0) / 1000000000.0;
      System.out.println("Total time for the search : " + totalTime + " seconds");

      double firstPhaseTime = (t2 - t0) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds");

      double secondPhaseTime = (t1 - t2) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds");

    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    }
  }
コード例 #4
0
  /** Test that we add tmpjars correctly including the ZK jar. */
  public void testAddDependencyJars() throws Exception {
    Job job = new Job();
    TableMapReduceUtil.addDependencyJars(job);
    String tmpjars = job.getConfiguration().get("tmpjars");

    System.err.println("tmpjars: " + tmpjars);
    assertTrue(tmpjars.contains("zookeeper"));
    assertFalse(tmpjars.contains("guava"));

    System.err.println("appending guava jar");
    TableMapReduceUtil.addDependencyJars(
        job.getConfiguration(), com.google.common.base.Function.class);
    tmpjars = job.getConfiguration().get("tmpjars");
    assertTrue(tmpjars.contains("guava"));
  }
コード例 #5
0
ファイル: GroupingTableMapper.java プロジェクト: Guavus/hbase
 /**
  * Use this before submitting a TableMap job. It will appropriately set up the job.
  *
  * @param table The table to be processed.
  * @param scan The scan with the columns etc.
  * @param groupColumns A space separated list of columns used to form the key used in collect.
  * @param mapper The mapper class.
  * @param job The current job.
  * @throws IOException When setting up the job fails.
  */
 @SuppressWarnings("unchecked")
 public static void initJob(
     String table, Scan scan, String groupColumns, Class<? extends TableMapper> mapper, Job job)
     throws IOException {
   TableMapReduceUtil.initTableMapperJob(
       table, scan, mapper, ImmutableBytesWritable.class, Result.class, job);
   job.getConfiguration().set(GROUP_COLUMNS, groupColumns);
 }
コード例 #6
0
ファイル: HFileOutputFormat2.java プロジェクト: mringg/hbase
  public static void configureIncrementalLoadMap(Job job, HTableDescriptor tableDescriptor)
      throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
  }
コード例 #7
0
 public static void initTableMapperJob(
     List<Scan> scans,
     Class<? extends Mapper> mapper,
     Class<? extends WritableComparable> outputKeyClass,
     Class<? extends Writable> outputValueClass,
     Job job)
     throws IOException {
   job.getConfiguration().setBoolean("table", true);
   TableMapReduceUtil.initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job);
 }
コード例 #8
0
 /**
  * Sets up the actual job.
  *
  * @param conf The current configuration.
  * @param args The command line parameters.
  * @return The newly created job.
  * @throws IOException When setting up the job fails.
  */
 public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
   String tableName = args[0];
   Path outputDir = new Path(args[1]);
   String reportSeparatorString = (args.length > 2) ? args[2] : ":";
   conf.set("ReportSeparator", reportSeparatorString);
   Job job = new Job(conf, NAME + "_" + tableName);
   job.setJarByClass(CellCounter.class);
   Scan scan = getConfiguredScanForJob(conf, args);
   TableMapReduceUtil.initTableMapperJob(
       tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
   job.setNumReduceTasks(1);
   job.setMapOutputKeyClass(Text.class);
   job.setMapOutputValueClass(IntWritable.class);
   job.setOutputFormatClass(TextOutputFormat.class);
   job.setOutputKeyClass(Text.class);
   job.setOutputValueClass(IntWritable.class);
   FileOutputFormat.setOutputPath(job, outputDir);
   job.setReducerClass(IntSumReducer.class);
   return job;
 }