@Override
  public void searchDB(String keyword) {
    long t0 = System.nanoTime();

    try {
      // First mapreduce phase setup
      HBaseConfiguration conf = config;
      Job job;
      job = new Job(conf, "MapReducePhase1");
      job.setJarByClass(MapReduceHbaseDB.class);
      Scan scan = new Scan();
      String columns = "myColumnFamily";
      scan.addColumns(columns);
      scan.setCaching(10000);

      // Second mapreduce phase setup
      HBaseConfiguration conf2 = new HBaseConfiguration();
      Job job2 = new Job(conf2, "MapReducePhase2");
      job2.setJarByClass(MapReduceHbaseDB.class);
      Scan scan2 = new Scan();
      String columns2 = "resultF";
      scan2.addColumns(columns2);
      scan2.setCaching(10000);

      // Execution of the first mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "myTable", scan, Mapper1.class, Text.class, Text.class, job);
      TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job);

      job.waitForCompletion(true);

      long t2 = System.nanoTime();

      // Execution of the second mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2);
      TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2);

      job2.waitForCompletion(true);

      long t1 = System.nanoTime();
      double totalTime = (t1 - t0) / 1000000000.0;
      System.out.println("Total time for the search : " + totalTime + " seconds");

      double firstPhaseTime = (t2 - t0) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds");

      double secondPhaseTime = (t1 - t2) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds");

    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    }
  }
Ejemplo n.º 2
0
  /** Job configuration. */
  public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Scan scan = new Scan();
    scan.addFamily(Cw09Constants.CF_FREQUENCIES_BYTES);
    scan.setBatch(Cw09Constants.CW09_INDEX_SCAN_BATCH);

    conf.set("mapred.map.tasks.speculative.execution", "false");
    conf.set("mapred.reduce.tasks.speculative.execution", "false");
    Job job = Job.getInstance(conf, "Count the total frequency of each term in the index table");
    job.setJarByClass(TermHitsCounter.class);
    // TableMapReduceUtil.initTableMapperJob(Constants.CLUEWEB09_INDEX_TABLE_NAME, scan,
    //		ThcMapper.class, Text.class, LongWritable.class, job);
    TableMapReduceUtil.initTableMapperJob(
        Cw09Constants.CLUEWEB09_INDEX_TABLE_NAME,
        scan,
        ThcMapper.class,
        Text.class,
        LongWritable.class,
        job,
        true,
        CustomizedSplitTableInputFormat.class);
    job.setCombinerClass(ThcCombiner.class);
    TableMapReduceUtil.initTableReducerJob(
        Cw09Constants.CLUEWEB09_TERM_COUNT_TABLE_NAME, ThcReducer.class, job);
    job.setNumReduceTasks(40);
    return job;
  }
Ejemplo n.º 3
0
  @Test
  public void shouldJoinTables() throws Exception {
    // given

    Job job = new Job(configuration, "Joins");
    job.setJarByClass(AverageRatingMapper.class);

    List<Scan> scans = new ArrayList<>();

    Scan scan1 = new Scan();
    scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieData.TABLE_NAME));
    scans.add(scan1);

    Scan scan2 = new Scan();
    scan2.setAttribute(
        Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(LoadMovieRatingData.TABLE_NAME));
    scans.add(scan2);

    TableMapReduceUtil.initTableMapperJob(scans, FilterMapper.class, null, null, job);
    //        FileOutputFormat.setOutputPath(job, new Path("/tmp/sages/movies_with_ratings_" +
    // System.currentTimeMillis()));
    TableMapReduceUtil.initTableReducerJob(TABLE_NAME, null, job);
    job.setNumReduceTasks(0);

    // when
    boolean succeeded = job.waitForCompletion(true);

    // then
    assertThat(succeeded).isTrue();
  }
Ejemplo n.º 4
0
  public static void summaryHtable() throws Exception {
    Job job = new Job(conf, "ExampleSummary");
    job.setJarByClass(HbaseMR.class); // class that contains mapper

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for
    // MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    // set other scan attrs

    TableMapReduceUtil.initTableMapperJob(
        "sourceTable", // input table
        scan, // Scan instance to control CF and attribute selection
        MyMapper3.class, // mapper class
        Text.class, // mapper output key
        IntWritable.class, // mapper output value
        job);
    TableMapReduceUtil.initTableReducerJob(
        "targetTable", // output table
        MyTableReducer3.class, // reducer class
        job);
    job.setNumReduceTasks(1); // at least one, adjust as required

    boolean b = job.waitForCompletion(true);
    if (!b) {
      throw new IOException("error with job!");
    }
  }
Ejemplo n.º 5
0
 public static void main(String[] args) throws Exception {
   Configuration conf = HBaseConfiguration.create();
   Job job = new Job(conf, SensorMR.class.getName() + "--<your name>"); // TODO
   job.setJarByClass(SensorMR.class);
   Scan scan = new Scan();
   scan.setFilter(new FirstKeyOnlyFilter());
   TableMapReduceUtil.initTableMapperJob(
       tableRawData, scan, Mapper1.class, ImmutableBytesWritable.class, FloatWritable.class, job);
   TableMapReduceUtil.initTableReducerJob(tableSummaryData, Reducer1.class, job);
   TableMapReduceUtil.addDependencyJars(job);
   System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
Ejemplo n.º 6
0
  /**
   * @param args the command line arguments
   * @throws java.io.IOException
   */
  public static void main(String[] args) throws IOException, Exception {
    Conf conf = new Conf();
    Job job = new Job(conf, "TweetsLanguage");

    Scan scan = new Scan();

    TableMapReduceUtil.initTableMapperJob(
        "hhscyber:tweets", scan, LanguageMapper.class, null, null, job);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.initTableReducerJob("hhscyber:tweets_lang", null, job);

    job.waitForCompletion(true);
  }
Ejemplo n.º 7
0
 /** Job configuration. */
 public static Job configureJob(Configuration conf, String[] args) throws IOException {
   Path inputPath = new Path(args[0]);
   String tableName = args[1];
   Job job = new Job(conf, NAME + "_" + tableName);
   job.setJarByClass(Uploader.class);
   FileInputFormat.setInputPaths(job, inputPath);
   job.setInputFormatClass(SequenceFileInputFormat.class);
   job.setMapperClass(Uploader.class);
   // No reducers.  Just write straight to table.  Call initTableReducerJob
   // because it sets up the TableOutputFormat.
   TableMapReduceUtil.initTableReducerJob(tableName, null, job);
   job.setNumReduceTasks(0);
   return job;
 }
  public static Job configureJob(Configuration conf, String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {
    Path inputPath = new Path("hdfs://Shrini:9000/stackexchange/stackexchange/PostQuestions.txt");

    createTable(tableName, colFamilyNames, conf);
    Job job = new Job(conf, "myjob");
    job.setJarByClass(MyHBaseMainClass.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(MyMapper.class);
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
    return job;
  }
Ejemplo n.º 9
0
  private static void createMapReduceJob(
      String tableNameToIndex, Configuration conf, int caching, int versions)
      throws IOException, InterruptedException, ClassNotFoundException {
    // Set the details to TableInputFormat
    Scan s = new Scan();
    s.setCaching(caching);
    s.setMaxVersions(versions);
    conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex);

    Set<Entry<String, List<String>>> entrySet = cfs.entrySet();
    for (Entry<String, List<String>> entry : entrySet) {
      List<String> quals = entry.getValue();
      addColumn(quals, Bytes.toBytes(entry.getKey()), s);
    }
    Job job = new Job(conf, "CreateIndex");
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

    TableMapReduceUtil.initTableMapperJob(
        tableNameToIndex, // input table
        s, // Scan instance to control CF and attribute selection
        IndexCreationMapper.class, // mapper class
        ImmutableBytesWritable.class, // mapper output key
        Put.class, // mapper output value
        job);

    TableMapReduceUtil.initTableReducerJob(
        IndexUtils.getIndexTableName(tableNameToIndex), // output
        // table
        null, // reducer class
        job);

    if (hfileOutPath != null) {
      HTable table = new HTable(conf, tableNameToIndex);
      job.setReducerClass(KeyValueSortReducer.class);
      Path outputDir = new Path(hfileOutPath);
      FileOutputFormat.setOutputPath(job, outputDir);
      HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
      job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(
        job.getConfiguration(), com.google.common.base.Preconditions.class);
    job.waitForCompletion(true);
    assert job.isComplete() == true;
  }
Ejemplo n.º 10
0
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration hbaseConf = HBaseConfiguration.create();
    hbaseConf.set(
        "hbase.zookeeper.quorum", "DamHadoop1,DamHadoop2,DamHadoop3"); // zookeeper服务器地址,要换成自己服务器地址
    // Job jobConf=new Job(hbaseConf, "FreqCounter");
    Job jobConf = Job.getInstance(hbaseConf, "FreqCounter");
    jobConf.setJobName("Hbase_FreqCounter");
    jobConf.setJarByClass(FreqCounter.class);

    Scan scan = new Scan();
    String columns = "details";
    scan.addFamily(Bytes.toBytes(columns));
    scan.setFilter(new FirstKeyOnlyFilter());
    TableMapReduceUtil.initTableMapperJob(
        "import_tests",
        scan,
        CountMapper.class,
        ImmutableBytesWritable.class,
        IntWritable.class,
        jobConf);
    TableMapReduceUtil.initTableReducerJob("summary_user", CountReducer.class, jobConf);
    System.exit(jobConf.waitForCompletion(true) ? 0 : 1);
  }
Ejemplo n.º 11
0
  public boolean doTest()
      throws InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException,
          IOException, InterruptedException {
    /** create config */
    conf_h = HBaseConfiguration.create();
    conf_h.set("hbase.zookeeper.quorum", "localhost");
    conf_h.set("hbase.zookeeper.property.clientPort", "2181");
    Connection con_h = null;
    try {
      con_h = ConnectionFactory.createConnection(conf_h);
    } catch (IOException e) {
      e.printStackTrace();
    }
    Admin admin = con_h.getAdmin();
    HTableDescriptor tableDesc = new HTableDescriptor(tableName_chi);
    HColumnDescriptor colFamDesc = new HColumnDescriptor("count");
    colFamDesc.setMaxVersions(1);
    tableDesc.addFamily(colFamDesc);
    admin.createTable(tableDesc);

    /** counting and insert in chiTable */
    Scan scan = new Scan();
    scan.addColumn(Bytes.toBytes("products"), Bytes.toBytes("product_category_id"));
    scan.addColumn(Bytes.toBytes("orders"), Bytes.toBytes("order_date"));
    // Creates a new Job with no particular Cluster
    Job job =
        Job.getInstance(conf_h, "Count"); // Job.getInstance(Configuration conf, String JobName)
    job.setJarByClass(
        ChiSquaredTest2_abc.class); // Set the Jar by finding where a given class came from
    // initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?>
    // outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job)
    TableMapReduceUtil.initTableMapperJob(
        "retail_order", scan, Map1.class, Text.class, IntWritable.class, job);
    // initTableReducerJob(String table, Class<? extends TableReducer> reducer,
    // org.apache.hadoop.mapreduce.Job job)
    TableMapReduceUtil.initTableReducerJob("chiTable", Reduce1.class, job);

    // boolean waitForCompletion(boolean verbose), verbose - print the progress to the user
    job.waitForCompletion(true); // Submit the job to the cluster and wait for it to finish

    /** extract value from chiTable */
    int totalY = 0;
    int totalN = 0;
    ArrayList<CellOfHTable> chiTable = new ArrayList<CellOfHTable>();
    Table table_h = con_h.getTable(tableName_chi);
    Scan s = new Scan();
    s.addFamily(Bytes.toBytes("count"));
    ResultScanner results = table_h.getScanner(s);
    for (Result r : results) {
      CellOfHTable c =
          new CellOfHTable(
              r.getRow(),
              r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")) == null
                  ? Bytes.toBytes(0)
                  : r.getValue(Bytes.toBytes("count"), Bytes.toBytes("Y")),
              r.getValue(Bytes.toBytes("count"), Bytes.toBytes("N")) == null
                  ? Bytes.toBytes(0)
                  : r.getValue(
                      Bytes.toBytes("count"), Bytes.toBytes("N"))); // (id, count_Y, count_N)
      chiTable.add(c);
      totalY = totalY + c.countY;
      totalN = totalN + c.countN;
    }

    results.close();
    table_h.close();
    admin.disableTable(tableName_chi);
    admin.deleteTable(tableName_chi);

    double chisquare = 0.0;
    for (int i = 0; i < chiTable.size(); i++) {
      CellOfHTable c = chiTable.get(i);
      double expectY =
          (double) (c.countY + c.countN) * (double) totalY / (double) (totalY + totalN);
      chisquare =
          chisquare + (((double) c.countY - expectY) * ((double) c.countY - expectY) / expectY);
      double expectN =
          (double) (c.countY + c.countN) * (double) totalN / (double) (totalY + totalN);
      chisquare =
          chisquare + (((double) c.countN - expectN) * ((double) c.countN - expectN) / expectN);
    }

    System.out.println(chisquare);
    ChiSquareDist csd = new ChiSquareDist((chiTable.size() - 1));
    if (chisquare > csd.inverseF(1.0 - alpha)) {
      return true;
    }
    return false;
  }