@Override
  public void searchDB(String keyword) {
    long t0 = System.nanoTime();

    try {
      // First mapreduce phase setup
      HBaseConfiguration conf = config;
      Job job;
      job = new Job(conf, "MapReducePhase1");
      job.setJarByClass(MapReduceHbaseDB.class);
      Scan scan = new Scan();
      String columns = "myColumnFamily";
      scan.addColumns(columns);
      scan.setCaching(10000);

      // Second mapreduce phase setup
      HBaseConfiguration conf2 = new HBaseConfiguration();
      Job job2 = new Job(conf2, "MapReducePhase2");
      job2.setJarByClass(MapReduceHbaseDB.class);
      Scan scan2 = new Scan();
      String columns2 = "resultF";
      scan2.addColumns(columns2);
      scan2.setCaching(10000);

      // Execution of the first mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "myTable", scan, Mapper1.class, Text.class, Text.class, job);
      TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job);

      job.waitForCompletion(true);

      long t2 = System.nanoTime();

      // Execution of the second mapreduce phase
      TableMapReduceUtil.initTableMapperJob(
          "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2);
      TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2);

      job2.waitForCompletion(true);

      long t1 = System.nanoTime();
      double totalTime = (t1 - t0) / 1000000000.0;
      System.out.println("Total time for the search : " + totalTime + " seconds");

      double firstPhaseTime = (t2 - t0) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds");

      double secondPhaseTime = (t1 - t2) / 1000000000.0;
      System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds");

    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    }
  }
  @Test
  public void shouldRunMapReduce() throws Exception {
    // given
    Job job = new Job(configuration, "Average Rating");
    job.setJarByClass(AverageRatingMapper.class);

    Scan scan = new Scan();
    scan.setCaching(500);
    scan.setCacheBlocks(false);
    scan.addFamily(Bytes.toBytes(LoadMovieRatingData.FAMILY_NAME));

    TableMapReduceUtil.initTableMapperJob(
        LoadMovieRatingData.TABLE_NAME,
        scan,
        AverageRatingMapper.class,
        Text.class,
        DoubleWritable.class,
        job);
    job.setReducerClass(RatingExportReducer.class);
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(
        job, new Path("/tmp/mr/mySummaryFile_" + System.currentTimeMillis()));

    // when
    boolean succeeded = job.waitForCompletion(true);

    // then
    assertThat(succeeded).isTrue();
  }
示例#3
0
  public Scan generateScan(
      String[] rowRange, FilterList filterList, String[] family, String[] columns, int maxVersion)
      throws Exception {
    if (table == null) throw new Exception("No table handler");
    if (cacheSize < 0) throw new Exception("should set cache size before scanning");

    Scan scan = null;

    try {
      scan = new Scan();
      scan.setCaching(this.cacheSize);
      scan.setCacheBlocks(this.blockCached);
      scan.setFilter(filterList);
      if (maxVersion > 0) scan.setMaxVersions(maxVersion);
      if (rowRange != null) {
        scan.setStartRow(rowRange[0].getBytes());
        if (rowRange.length == 2) scan.setStopRow(rowRange[1].getBytes());
      }

      if (columns != null) {
        for (int i = 0; i < columns.length; i++) {
          scan.addColumn(family[0].getBytes(), columns[i].getBytes());
          // System.out.println(family[i]+";"+columns[i]);
        }
      } else {
        scan.addFamily(family[0].getBytes());
      }

    } catch (Exception e) {
      e.printStackTrace();
    }

    return scan;
  }
  /**
   * Sets up the actual job.
   *
   * @param conf The current configuration.
   * @param args The command line parameters.
   * @return The newly created job.
   * @throws IOException When setting up the job fails.
   */
  public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(CachingRowCounter.class);
    // Columns are space delimited
    StringBuilder sb = new StringBuilder();
    final int columnoffset = 1;
    for (int i = columnoffset; i < args.length; i++) {
      if (i > columnoffset) {
        sb.append(" ");
      }
      sb.append(args[i]);
    }

    Scan scan = new Scan();
    scan.setFilter(new FirstKeyOnlyFilter());
    if (sb.length() > 0) {
      for (String columnName : sb.toString().split(" ")) {
        String[] fields = columnName.split(":");
        if (fields.length == 1) {
          scan.addFamily(Bytes.toBytes(fields[0]));
        } else {
          scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
        }
      }
    }
    scan.setCaching(100);

    // Second argument is the table name.
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(
        tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
    job.setNumReduceTasks(0);
    return job;
  }
示例#5
0
  public static void main(String[] args) throws Exception {
    new JobConf().setSpeculativeExecution(false);
    Configuration conf = new Configuration();
    conf.set("es.nodes", ES_NODES);
    conf.set("es.resource", ES_RESOURCE);
    conf.set("es.mapping.id", HBaseTableMapper.ID_FIELD.toString());
    conf.set("es.batch.size.bytes", "10mb");
    conf.set("es.batch.size.entries", "10000");
    conf.set("es.batch.write.refresh", "false");

    Job job = new Job(conf);
    job.setJarByClass(BulkIndex.class);
    job.setMapperClass(HBaseTableMapper.class);
    job.setNumReduceTasks(0);
    job.setSpeculativeExecution(false);
    job.setOutputFormatClass(BulkProcessorOutputFormat.class);
    job.setMapOutputValueClass(Text.class);

    Scan scan = new Scan();
    scan.setCaching(1000);
    scan.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob(
        BulkLoad.HBASE_TABLE_NAME,
        scan,
        HBaseTableMapper.class,
        NullWritable.class,
        MapWritable.class,
        job);

    job.waitForCompletion(true);
  }
示例#6
0
 /**
  * Performs a full scan of a catalog table.
  *
  * @param catalogTracker
  * @param visitor Visitor invoked against each row.
  * @param startrow Where to start the scan. Pass null if want to begin scan at first row.
  * @param scanRoot True if we are to scan <code>-ROOT-</code> rather than <code>.META.</code>, the
  *     default (pass false to scan .META.)
  * @throws IOException
  */
 static void fullScan(
     CatalogTracker catalogTracker,
     final Visitor visitor,
     final byte[] startrow,
     final boolean scanRoot)
     throws IOException {
   Scan scan = new Scan();
   if (startrow != null) scan.setStartRow(startrow);
   if (startrow == null && !scanRoot) {
     int caching =
         catalogTracker
             .getConnection()
             .getConfiguration()
             .getInt(HConstants.HBASE_META_SCANNER_CACHING, 100);
     scan.setCaching(caching);
   }
   scan.addFamily(HConstants.CATALOG_FAMILY);
   HTable metaTable = scanRoot ? getRootHTable(catalogTracker) : getMetaHTable(catalogTracker);
   ResultScanner scanner = metaTable.getScanner(scan);
   try {
     Result data;
     while ((data = scanner.next()) != null) {
       if (data.isEmpty()) continue;
       // Break if visit returns false.
       if (!visitor.visit(data)) break;
     }
   } finally {
     scanner.close();
     metaTable.close();
   }
   return;
 }
 public static void main(String[] args) throws Exception {
   Configuration con = new Configuration();
   String[] otherArgs = new GenericOptionsParser(con, args).getRemainingArgs();
   HBaseConfiguration conf = new HBaseConfiguration();
   Job job = new Job(conf, "AverageCalc");
   job.setJarByClass(AverageCalculator.class);
   Scan scan = new Scan();
   scan.setCaching(500);
   scan.setCacheBlocks(false);
   scan.addFamily(Bytes.toBytes("Post"));
   FilterList li = new FilterList(FilterList.Operator.MUST_PASS_ALL);
   SingleColumnValueFilter filter =
       new SingleColumnValueFilter(
           Bytes.toBytes("Post"),
           Bytes.toBytes("PostTypeId"),
           CompareOp.EQUAL,
           Bytes.toBytes("1"));
   li.addFilter(filter);
   scan.setFilter(li);
   FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
   job.setOutputKeyClass(Text.class);
   TableMapReduceUtil.initTableMapperJob(
       "bigd24-hbase-sample", scan, Mapper1.class, Text.class, IntWritable.class, job);
   job.setReducerClass(Reducer1.class);
   job.setOutputValueClass(FloatWritable.class);
   System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
  private Scan createScan(String applicationName, Range range, boolean scanBackward) {
    Scan scan = new Scan();
    scan.setCaching(this.scanCacheSize);

    byte[] bApplicationName = Bytes.toBytes(applicationName);
    byte[] traceIndexStartKey = SpanUtils.getTraceIndexRowKey(bApplicationName, range.getFrom());
    byte[] traceIndexEndKey = SpanUtils.getTraceIndexRowKey(bApplicationName, range.getTo());

    if (scanBackward) {
      // start key is replaced by end key because key has been reversed
      scan.setStartRow(traceIndexEndKey);
      scan.setStopRow(traceIndexStartKey);
    } else {
      scan.setReversed(true);
      scan.setStartRow(traceIndexStartKey);
      scan.setStopRow(traceIndexEndKey);
    }

    scan.addFamily(HBaseTables.APPLICATION_TRACE_INDEX_CF_TRACE);
    scan.setId("ApplicationTraceIndexScan");

    // toString() method of Scan converts a message to json format so it is slow for the first time.
    logger.trace("create scan:{}", scan);
    return scan;
  }
示例#9
0
  public static void htableFile() throws Exception {
    Job job = new Job(conf, "ExampleSummaryToFile");
    job.setJarByClass(HbaseMR.class); // class that contains mapper

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for
    // MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    // set other scan attrs

    TableMapReduceUtil.initTableMapperJob(
        "sourceTable", // input table
        scan, // Scan instance to control CF and attribute selection
        MyMapper.class, // mapper class
        Text.class, // mapper output key
        IntWritable.class, // mapper output value
        job);
    job.setReducerClass(MyReducer4.class); // reducer class
    job.setNumReduceTasks(1); // at least one, adjust as required
    FileOutputFormat.setOutputPath(new JobConf(conf), new Path("/tmp/mr/mySummaryFile")); // adjust
    // directories
    // as
    // required

    boolean b = job.waitForCompletion(true);
    if (!b) {
      throw new IOException("error with job!");
    }
  }
  public static void main(String args[]) {
    if (args.length == 0) {
      System.out.println("JavaHBaseDistributedScan  {master} {tableName}");
    }

    String master = args[0];
    String tableName = args[1];

    JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseDistributedScan");
    jsc.addJar("SparkHBase.jar");

    Configuration conf = HBaseConfiguration.create();
    conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

    Scan scan = new Scan();
    scan.setCaching(100);

    JavaRDD<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> javaRdd =
        hbaseContext.hbaseRDD(tableName, scan);

    List<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> results = javaRdd.collect();

    results.size();
  }
示例#11
0
  public static void readWriteHtable() throws Exception {
    Job job = new Job(conf, "ExampleReadWrite");
    job.setJarByClass(HbaseMR.class); // class that contains mapper

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for
    // MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    // set other scan attrs

    TableMapReduceUtil.initTableMapperJob(
        "sourceTable", // input table
        scan, // Scan instance to control CF and attribute selection
        MyMapper2.class, // mapper class
        null, // mapper output key
        null, // mapper output value
        job);
    TableMapReduceUtil.initTableReducerJob(
        "targetTable", // output table
        null, // reducer class
        job);
    job.setNumReduceTasks(0);

    boolean b = job.waitForCompletion(true);
    if (!b) {
      throw new IOException("error with job!");
    }
  }
示例#12
0
  /**
   * Get a collection of Scans, one per region, that cover the range of the table having the given
   * key prefix. Thes will be used as the map task input splits.
   */
  public static List<Scan> scansThisCubeOnly(byte[] keyPrefix, byte[][] splitKeys)
      throws IOException {
    Scan copyScan = new Scan();
    copyScan.setCaching(5000);
    copyScan.setCacheBlocks(false);

    // Hack: generate a key that probably comes after all this cube's keys but doesn't include any
    // keys not belonging to this cube.
    byte[] keyAfterCube = ArrayUtils.addAll(keyPrefix, fiftyBytesFF);

    List<Scan> scans = new ArrayList<Scan>();
    Scan scanUnderConstruction = new Scan(copyScan);

    for (byte[] splitKey : splitKeys) {
      scanUnderConstruction.setStopRow(splitKey);

      // Coerce scan to only touch keys belonging to this cube
      Scan truncated = truncateScan(scanUnderConstruction, keyPrefix, keyAfterCube);
      if (truncated != null) {
        scans.add(truncated);
      }

      scanUnderConstruction = new Scan(copyScan);
      scanUnderConstruction.setStartRow(splitKey);
    }

    // There's another region from last split key to the end of the table.
    Scan truncated = truncateScan(scanUnderConstruction, keyPrefix, keyAfterCube);
    if (truncated != null) {
      scans.add(truncated);
    }

    return scans;
  }
    public static List<Delete> GetDeleteEventsBetween(
        Table VTEvent_Table, String imo_str, long first_timestamp, long last_timestamp)
        throws IOException {
      // scan
      // 'cdb_vessel:vessel_event',{FILTER=>"(PrefixFilter('0000003162')"}
      Scan GetEventsBetween = new Scan();
      GetEventsBetween.setStartRow(
              Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - last_timestamp, 19) + "0000000000"))
          .setStopRow(
              Bytes.toBytes(
                  imo_str + LpadNum(Long.MAX_VALUE - first_timestamp + 1, 19) + "9999999999"))
          .addColumn(details, exittime);
      GetEventsBetween.setCaching(100);

      Filter ExistTimeValuefilter =
          new ValueFilter(
              CompareFilter.CompareOp.LESS_OR_EQUAL,
              new BinaryComparator(
                  Bytes.toBytes(new DateTime(last_timestamp).toString(rawformatter))));
      GetEventsBetween.setFilter(ExistTimeValuefilter);

      ResultScanner Result_ExistingEvents = VTEvent_Table.getScanner(GetEventsBetween);
      List<Delete> deletes = new ArrayList<Delete>();

      for (Result res : Result_ExistingEvents) {
        deletes.add(new Delete(res.getRow()));
      }

      Result_ExistingEvents.close();
      return deletes;
    }
    // Get all events with exit at last location
    public static Map<Integer, VesselEvent> getAllEventsStartBeforeEndAfterBeforeLocation(
        Table VTEvent_Table, String IMO_str, VesselLocation location) throws IOException {
      Scan getAllEventsWithExistAtLastLocation = new Scan();
      getAllEventsWithExistAtLastLocation
          .setStartRow(
              Bytes.toBytes(
                  IMO_str + LpadNum(Long.MAX_VALUE - location.recordtime, 19) + "0000000000"))
          .setStopRow(Bytes.toBytes(IMO_str + LpadNum(Long.MAX_VALUE, 19) + "9999999999"))
          .addColumn(details, exittime);
      getAllEventsWithExistAtLastLocation.setCaching(100);

      Filter ExistTimeValuefilter =
          new ValueFilter(
              CompareFilter.CompareOp.GREATER_OR_EQUAL,
              new BinaryComparator(
                  Bytes.toBytes(new DateTime(location.recordtime).toString(rawformatter))));
      getAllEventsWithExistAtLastLocation.setFilter(ExistTimeValuefilter);

      ResultScanner Result_event = VTEvent_Table.getScanner(getAllEventsWithExistAtLastLocation);

      Map<Integer, VesselEvent> events = new HashMap<Integer, VesselEvent>();

      for (Result res : Result_event) {

        Get get = new Get(res.getRow());
        get.addColumn(details, entrytime);
        get.addColumn(details, entrycoordinates);

        Result result = VTEvent_Table.get(get);
        String rowkey = Bytes.toString(result.getRow());
        String polygonid = rowkey.substring(26);

        VesselEvent VE = new VesselEvent();
        VE.exittime = location.recordtime;
        VE.exitcoordinates = location.coordinates;
        VE.destination = location.destination;
        VE.polygonid = Integer.parseInt(polygonid);

        for (Cell cell : result.rawCells()) {
          String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
          String Value = Bytes.toString(CellUtil.cloneValue(cell));

          if (Qualifier.equals("entertime")) {
            VE.entrytime = DateTime.parse(Value, rawformatter).getMillis();
          } else if (Qualifier.equals("entercoordinates")) {
            VE.entrycoordinates = Value;
          }
        }

        events.put(VE.polygonid, VE);
      }

      Result_event.close();
      return events;
    }
  public synchronized ResultScanner getScanner(
      final TransactionState transactionState, final Scan scan) throws IOException {
    if (LOG.isTraceEnabled()) LOG.trace("Enter TransactionalTable.getScanner");
    if (scan.getCaching() <= 0) {
      scan.setCaching(getScannerCaching());
    }

    Long value = (long) 0;
    TransactionalScanner scanner = new TransactionalScanner(this, transactionState, scan, value);

    return scanner;
  }
  public static Job startJob(String[] args) throws IOException {

    // args[0] = hbase table name
    // args[1] = zookeeper

    Configuration hConf = HBaseConfiguration.create(new Configuration());
    hConf.set("hbase.zookeeper.quorum", args[1]);
    hConf.set("scan.table", args[0]);
    hConf.set("hbase.zookeeper.property.clientPort", "2181");

    Scan scan = new Scan();
    // scan.setFilter(rowColBloomFilter());

    Job job = new Job(hConf);
    job.setJobName("BSBM-Q11-RepartitionJoin");
    job.setJarByClass(RepartitionJoinQ11.class);
    // Change caching to speed up the scan
    scan.setCaching(500);
    scan.setMaxVersions(200);
    scan.setCacheBlocks(false);

    // Mapper settings
    TableMapReduceUtil.initTableMapperJob(
        args[0], // input HBase table name
        scan, // Scan instance to control CF and attribute selection
        RepartitionMapper.class, // mapper
        CompositeKeyWritable.class, // mapper output key
        KeyValueArrayWritable.class, // mapper output value
        job);

    // Repartition settings
    job.setPartitionerClass(CompositePartitioner.class);
    job.setSortComparatorClass(CompositeSortComparator.class);
    job.setGroupingComparatorClass(CompositeGroupingComparator.class);

    // Reducer settings
    job.setReducerClass(SharedServices.RepartitionJoin_Reducer.class); // reducer class
    job.setNumReduceTasks(1); // at least one, adjust as required

    FileOutputFormat.setOutputPath(job, new Path("output/BSBMQ11"));

    try {
      System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }

    return job;
  }
  public static void main(String[] args) throws IOException, SolrServerException {

    final Configuration conf;
    HttpSolrServer solrServer = new HttpSolrServer("http://c1master:8983/solr");
    conf = HBaseConfiguration.create();

    // Define Hbase Table Name
    HTable table = new HTable(conf, "test_global_shop");
    Scan scan = new Scan();

    // Define Hbase Column Family
    scan.addFamily(Bytes.toBytes("shop"));
    scan.setCaching(1000);
    scan.setCacheBlocks(false);
    ResultScanner ss = table.getScanner(scan);

    System.out.println("start Storing...");
    int i = 0;
    try {
      for (Result r : ss) {
        SolrInputDocument solrDoc = new SolrInputDocument();
        solrDoc.addField("key", new String(r.getRow()));
        for (KeyValue kv : r.raw()) {
          String fieldName = new String(kv.getQualifier());
          String fieldValue = new String(kv.getValue());
          if (fieldName.equalsIgnoreCase("address")
              || fieldName.equalsIgnoreCase("category")
              || fieldName.equalsIgnoreCase("name")
              || fieldName.equalsIgnoreCase("province")
              || fieldName.equalsIgnoreCase("tel")) {
            solrDoc.addField(fieldName, fieldValue);
          }
        }
        solrServer.add(solrDoc);
        solrServer.commit(true, true, true);
        i = i + 1;
        System.out.println("Already Succcess " + i + " number data");
      }
      ss.close();
      table.close();
      System.out.println("done !");
    } catch (IOException e) {
    } finally {
      ss.close();
      table.close();
      System.out.println("error !");
    }
  }
    public static void updateExistingEventsToEndAtLastLocation(
        Table VTEvent_Table, long imo, VesselLocation lastlocation) throws IOException {
      // update existing events that started BEFORE the first new location and end after the first
      // to end as the last location

      // Find existing events that started BEFORE the first new location and end after the first
      Scan getEventStartedBeforeAndEndAfter = new Scan();
      ;
      getEventStartedBeforeAndEndAfter
          .setStartRow(
              Bytes.toBytes(
                  LpadNum(imo, 7)
                      + LpadNum(Long.MAX_VALUE - lastlocation.recordtime, 19)
                      + "0000000000"))
          .setStopRow(Bytes.toBytes(LpadNum(imo, 7) + LpadNum(Long.MAX_VALUE, 19) + "9999999999"))
          .addColumn(details, exittime);
      getEventStartedBeforeAndEndAfter.setCaching(100);

      Filter ExistTimeValuefilter =
          new ValueFilter(
              CompareFilter.CompareOp.GREATER,
              new BinaryComparator(
                  Bytes.toBytes(new DateTime(lastlocation.recordtime).toString(rawformatter))));
      getEventStartedBeforeAndEndAfter.setFilter(ExistTimeValuefilter);

      ResultScanner Result_eventcross = VTEvent_Table.getScanner(getEventStartedBeforeAndEndAfter);

      List<Put> puts = new ArrayList<Put>();
      for (Result res : Result_eventcross) {

        // vessel event table
        // rowkey: imo(7)+timestamp(19 desc)+polygonid(8)
        // qualifier:entrytime,entrycoordinates,exittime,exitcoordinates,destination
        byte[] rowkey = res.getRow();
        Put updateevent = new Put(rowkey);
        updateevent.addColumn(
            details,
            exittime,
            Bytes.toBytes(new DateTime(lastlocation.recordtime).toString(rawformatter)));
        updateevent.addColumn(details, coordinates, Bytes.toBytes(lastlocation.coordinates));
        updateevent.addColumn(details, destination, Bytes.toBytes(lastlocation.destination));
        puts.add(updateevent);
      }

      Result_eventcross.close();
      VTEvent_Table.put(puts);
    }
示例#19
0
  private static void createMapReduceJob(
      String tableNameToIndex, Configuration conf, int caching, int versions)
      throws IOException, InterruptedException, ClassNotFoundException {
    // Set the details to TableInputFormat
    Scan s = new Scan();
    s.setCaching(caching);
    s.setMaxVersions(versions);
    conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex);

    Set<Entry<String, List<String>>> entrySet = cfs.entrySet();
    for (Entry<String, List<String>> entry : entrySet) {
      List<String> quals = entry.getValue();
      addColumn(quals, Bytes.toBytes(entry.getKey()), s);
    }
    Job job = new Job(conf, "CreateIndex");
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

    TableMapReduceUtil.initTableMapperJob(
        tableNameToIndex, // input table
        s, // Scan instance to control CF and attribute selection
        IndexCreationMapper.class, // mapper class
        ImmutableBytesWritable.class, // mapper output key
        Put.class, // mapper output value
        job);

    TableMapReduceUtil.initTableReducerJob(
        IndexUtils.getIndexTableName(tableNameToIndex), // output
        // table
        null, // reducer class
        job);

    if (hfileOutPath != null) {
      HTable table = new HTable(conf, tableNameToIndex);
      job.setReducerClass(KeyValueSortReducer.class);
      Path outputDir = new Path(hfileOutPath);
      FileOutputFormat.setOutputPath(job, outputDir);
      HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
      job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(
        job.getConfiguration(), com.google.common.base.Preconditions.class);
    job.waitForCompletion(true);
    assert job.isComplete() == true;
  }
示例#20
0
  @SuppressWarnings({"rawtypes", "unchecked"})
  public void Read() throws IOException {
    List list = new ArrayList();
    Scan scan = new Scan();
    scan.setBatch(0);
    scan.setCaching(10000);
    scan.setMaxVersions();
    scan.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total"));
    ResultScanner rsScanner = table.getScanner(scan);
    for (Result rs : rsScanner) {
      String date = Bytes.toString(rs.getRow());
      String total = Bytes.toString(rs.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total")));
      list.add(date + "\t" + total);
    }

    for (int i = 0; i < 7; i++)
      System.out.println((String) list.get(i) + "\t" + (String) list.get(i + 7));
  }
示例#21
0
    private void loadIPs() {
      dns = new HashMap(100000000); // ���貢��
      unknownHosts = new HashMap(1000000);
      querying = new HashMap(100000);

      try {
        int statsCommit = 500000;

        HConnection connection = HConnectionManager.createConnection(HBaseConfiguration.create());
        HTableInterface fetchFailTable = connection.getTable("fetchFail");
        Scan scan = new Scan();
        scan.setCaching(statsCommit);

        List<Filter> filters = new ArrayList<Filter>();
        Filter filter = new ColumnPrefixFilter(Bytes.toBytes("ip"));
        filters.add(filter);
        FilterList filterList = new FilterList(filters);
        scan.setFilter(filterList);

        ResultScanner rs = fetchFailTable.getScanner(scan);
        long cnt = 0;
        for (Result r : rs) {
          NavigableMap<byte[], byte[]> map = r.getFamilyMap(Bytes.toBytes("cf"));
          String ip = Bytes.toString(map.get(Bytes.toBytes("ip")));
          String host = Bytes.toString(r.getRow()).split("��")[0];
          if (host != null && ip != null) {
            dns.put(host, ip);
          }

          if (++cnt % statsCommit == 0) {
            LOG.info("loadIPs url=" + Bytes.toString(r.getRow()) + " cnt=" + cnt);
          }
        }
        rs.close();
        fetchFailTable.close();
        LOG.info("load hostip cache=" + dns.size());

        connection.close();
      } catch (Exception e) {
        e.printStackTrace();
      } finally {
        //
      }
    }
  // MapReduce Stage-1 Job
  public static Job startJob_Stage1(String[] args, Configuration hConf) throws IOException {

    // args[0] = hbase table name
    // args[1] = zookeeper

    /*
     * MapReduce Stage-1 Job
     * Retrieve a list of subjects and their attributes
     */
    Scan scan1 = new Scan();
    Job job1 = new Job(hConf);
    job1.setJobName("BSBM-Q8-RepartitionJoin");
    job1.setJarByClass(RepartitionJoinQ8.class);
    // Change caching and number of time stamps to speed up the scan
    scan1.setCaching(500);
    scan1.setMaxVersions(200);
    scan1.setCacheBlocks(false);

    // Mapper settings
    TableMapReduceUtil.initTableMapperJob(
        args[0], // input HBase table name
        scan1, // Scan instance to control CF and attribute selection
        RepartitionMapper.class, // mapper class
        CompositeKeyWritable.class, // mapper output key
        KeyValueArrayWritable.class, // mapper output value
        job1);

    // Reducer settings
    job1.setReducerClass(RepartitionReducer.class);

    job1.setOutputFormatClass(TextOutputFormat.class);
    // job1.setNumReduceTasks(1);  // Uncomment this if running into problems on 2+ node cluster
    FileOutputFormat.setOutputPath(job1, new Path("output/BSBMQ8"));

    try {
      job1.waitForCompletion(true);
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }

    return job1;
  }
    public static List<VesselLocation> getLocationsBetween(
        Table VTLocation_Table, String imo_str, long first_timestamp, long last_timestamp)
        throws IOException {

      // scan
      // 'cdb_vessel:vessel_location',{FILTER=>"(PrefixFilter('0000003162')"}
      Scan GetExistingLocations = new Scan();
      GetExistingLocations.setStartRow(
              Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - last_timestamp, 19)))
          .setStopRow(Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - first_timestamp + 1, 19)));
      GetExistingLocations.setCaching(1000);

      ResultScanner Result_ExistingLocations = VTLocation_Table.getScanner(GetExistingLocations);
      List<VesselLocation> result = new ArrayList<VesselLocation>();

      for (Result res : Result_ExistingLocations) {
        VesselLocation VL = new VesselLocation();

        for (Cell cell : res.rawCells()) {
          String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
          String Value = Bytes.toString(CellUtil.cloneValue(cell));

          if (Qualifier.equals("coordinates")) {
            VL.coordinates = Value;
          } else if (Qualifier.equals("speed")) {
            VL.speed = Value;
          } else if (Qualifier.equals("destination")) {
            VL.destination = Value;
          } else if (Qualifier.equals("timestamp")) {
            VL.recordtime = DateTime.parse(Value, rawformatter).getMillis();
          } else if (Qualifier.equals("previouslocation")) {
            VL.previouslocation = Value;
          } else if (Qualifier.equals("nextlocation")) {
            VL.nextlocation = Value;
          }
        }
        result.add(VL);
      }

      Result_ExistingLocations.close();

      return result;
    }
示例#24
0
  public static void scanNum(String tableStr) {
    try {
      Configuration conf = HBaseConfiguration.create();
      Scan scan = new Scan();
      scan.setCaching(5000);
      Filter f = new FirstKeyOnlyFilter();
      scan.setFilter(f);

      HTable htable = new HTable(conf, Bytes.toBytes(tableStr));
      ResultScanner scanner = htable.getScanner(scan);
      int count = 0;
      for (Result scannerRst : scanner) {
        count++;
      }
      System.out.println(tableStr + ":" + count);
      scanner.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
示例#25
0
  /**
   * Deletes the specified table with all its columns. ATTENTION: Invoking this method will delete
   * the table if it exists and therefore causes data loss.
   */
  @Override
  public void clearStorage() throws StorageException {
    HBaseAdmin adm = getAdminInterface();

    try { // first of all, check if table exists, if not - we are done
      if (!adm.tableExists(tableName)) {
        logger.debug("clearStorage() called before table {} was created, skipping.", tableName);
        return;
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    }

    HTable table = null;

    try {
      table = new HTable(hconf, tableName);

      Scan scan = new Scan();
      scan.setBatch(100);
      scan.setCacheBlocks(false);
      scan.setCaching(2000);

      ResultScanner scanner = null;

      try {
        scanner = table.getScanner(scan);

        for (Result res : scanner) {
          table.delete(new Delete(res.getRow()));
        }
      } finally {
        IOUtils.closeQuietly(scanner);
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    } finally {
      IOUtils.closeQuietly(table);
    }
  }
示例#26
0
 public static void readHtable() throws Exception {
   Job job = new Job(conf, "ExampleRead");
   job.setJarByClass(HbaseMR.class);
   Scan scan = new Scan();
   scan.setCaching(500); // 1 is the default in Scan, which will be bad for
   // MapReduce jobs
   scan.setCacheBlocks(false); // don't set to true for MR jobs
   TableMapReduceUtil.initTableMapperJob(
       "", // input HBase table name
       scan, // Scan instance to control CF and attribute selection
       MyMapper.class, // mapper
       null, // mapper output key
       null, // mapper output value
       job);
   job.setOutputFormatClass(NullOutputFormat.class); // because we aren't
   // emitting anything
   // from mapper
   boolean b = job.waitForCompletion(true);
   if (!b) {
     throw new IOException("error with job!");
   }
 }
示例#27
0
  @SuppressWarnings({"resource", "rawtypes", "unchecked"})
  public static void main(String[] args) throws IOException {
    Configuration config = HBaseConfiguration.create();
    config.set("hbase.master", "192.168.32.128:60000");
    config.set("hbase.zookeeper.property.clientPort", "2181");
    config.set("hbase.zookeeper.quorum", "192.168.32.128");
    HTable table = new HTable(config, Bytes.toBytes("weibo"));
    List list = new ArrayList();
    Scan scan = new Scan();
    scan.setBatch(0);
    scan.setCaching(10000);
    scan.setMaxVersions();
    scan.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total"));
    ResultScanner rsScanner = table.getScanner(scan);
    for (Result rs : rsScanner) {
      String date = Bytes.toString(rs.getRow());
      String total = Bytes.toString(rs.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total")));
      list.add(date + "\t" + total);
    }

    for (int i = 0; i < 7; i++)
      System.out.println((String) list.get(i) + "\t" + (String) list.get(i + 7));
  }
  @Test
  public void shouldRunMapReduce() throws Exception {
    // given
    Configuration configuration = HBaseConfiguration.create();

    TableFactory.recreateTable(
        configuration, Bytes.toString(UsersDao.TABLE_NAME), Bytes.toString(UsersDao.FAMILY_NAME));
    UserDataFactory.insertTestData();

    // map reduce
    Job job = new Job(configuration, "Count Users");
    job.setJarByClass(CountUsersMapper.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    scan.addColumn(UsersDao.FAMILY_NAME, UsersDao.FORENAME_COL);

    // mapper
    TableMapReduceUtil.initTableMapperJob(
        Bytes.toString(UsersDao.TABLE_NAME),
        scan,
        CountUsersMapper.class,
        ImmutableBytesWritable.class,
        Result.class,
        job);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);

    // when
    boolean succeeded = job.waitForCompletion(true);

    // then
    assertThat(succeeded).isTrue();
    assertThat(job.getCounters().findCounter(CountUsersMapper.Counters.USER_COUNT).getValue())
        .isGreaterThan(99);
  }
示例#29
0
  /**
   * It should be noticed that the stop row in scan is not included as default
   *
   * @param rowRange
   * @param filterList
   * @param family
   * @param columns
   * @param maxVersion
   * @return
   * @throws Exception
   */
  public ResultScanner getResultSet(
      String[] rowRange, FilterList filterList, String[] family, String[] columns, int maxVersion)
      throws Exception {
    if (table == null) throw new Exception("No table handler");
    if (cacheSize < 0) throw new Exception("should set cache size before scanning");

    Scan scan = null;
    ResultScanner rscanner = null;

    try {
      scan = new Scan();

      scan.setCaching(this.cacheSize);
      scan.setCacheBlocks(blockCached);
      scan.setFilter(filterList);
      if (maxVersion > 0) scan.setMaxVersions(maxVersion);

      // scan exclude the stop row directly, so have to make a little difference of the stop row
      if (rowRange != null) {
        scan.setStartRow(rowRange[0].getBytes());
        if (rowRange.length == 2 && rowRange[1] != null) scan.setStopRow((rowRange[1]).getBytes());
      }

      if (columns != null) {
        for (int i = 0; i < columns.length; i++) {
          scan.addColumn(family[0].getBytes(), columns[i].getBytes());
        }
      }

      rscanner = this.table.getScanner(scan);

    } catch (Exception e) {
      e.printStackTrace();
    }

    return rscanner;
  }
示例#30
0
  @Override
  protected StateScanner scanStates(byte[] startRow, byte[] stopRow) throws IOException {
    Scan scan = new Scan(startRow, stopRow);
    scan.setMaxVersions(1);
    scan.addColumn(QueueEntryRow.COLUMN_FAMILY, stateColumnName);
    scan.setCaching(MAX_SCAN_ROWS);
    // TODO: Add filter for getting committed processed rows only. Need to refactor
    // HBaseQueue2Consumer to extract that.
    final ResultScanner scanner =
        DistributedScanner.create(hTable, scan, keyDistributor, scanExecutor);
    return new StateScanner() {

      private Result result;

      @Override
      public boolean nextStateRow() throws IOException {
        result = scanner.next();
        return result != null;
      }

      @Override
      public byte[] getRow() {
        return keyDistributor.getOriginalKey(result.getRow());
      }

      @Override
      public byte[] getState() {
        return result.value();
      }

      @Override
      public void close() throws IOException {
        scanner.close();
      }
    };
  }