@Override public void searchDB(String keyword) { long t0 = System.nanoTime(); try { // First mapreduce phase setup HBaseConfiguration conf = config; Job job; job = new Job(conf, "MapReducePhase1"); job.setJarByClass(MapReduceHbaseDB.class); Scan scan = new Scan(); String columns = "myColumnFamily"; scan.addColumns(columns); scan.setCaching(10000); // Second mapreduce phase setup HBaseConfiguration conf2 = new HBaseConfiguration(); Job job2 = new Job(conf2, "MapReducePhase2"); job2.setJarByClass(MapReduceHbaseDB.class); Scan scan2 = new Scan(); String columns2 = "resultF"; scan2.addColumns(columns2); scan2.setCaching(10000); // Execution of the first mapreduce phase TableMapReduceUtil.initTableMapperJob( "myTable", scan, Mapper1.class, Text.class, Text.class, job); TableMapReduceUtil.initTableReducerJob("result", Reducer1.class, job); job.waitForCompletion(true); long t2 = System.nanoTime(); // Execution of the second mapreduce phase TableMapReduceUtil.initTableMapperJob( "result", scan2, Mapper2.class, Text.class, IntWritable.class, job2); TableMapReduceUtil.initTableReducerJob("result2", Reducer2.class, job2); job2.waitForCompletion(true); long t1 = System.nanoTime(); double totalTime = (t1 - t0) / 1000000000.0; System.out.println("Total time for the search : " + totalTime + " seconds"); double firstPhaseTime = (t2 - t0) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + firstPhaseTime + " seconds"); double secondPhaseTime = (t1 - t2) / 1000000000.0; System.out.println("Time for the first mapreduce phase : " + secondPhaseTime + " seconds"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
@Test public void shouldRunMapReduce() throws Exception { // given Job job = new Job(configuration, "Average Rating"); job.setJarByClass(AverageRatingMapper.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes(LoadMovieRatingData.FAMILY_NAME)); TableMapReduceUtil.initTableMapperJob( LoadMovieRatingData.TABLE_NAME, scan, AverageRatingMapper.class, Text.class, DoubleWritable.class, job); job.setReducerClass(RatingExportReducer.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath( job, new Path("/tmp/mr/mySummaryFile_" + System.currentTimeMillis())); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); }
public Scan generateScan( String[] rowRange, FilterList filterList, String[] family, String[] columns, int maxVersion) throws Exception { if (table == null) throw new Exception("No table handler"); if (cacheSize < 0) throw new Exception("should set cache size before scanning"); Scan scan = null; try { scan = new Scan(); scan.setCaching(this.cacheSize); scan.setCacheBlocks(this.blockCached); scan.setFilter(filterList); if (maxVersion > 0) scan.setMaxVersions(maxVersion); if (rowRange != null) { scan.setStartRow(rowRange[0].getBytes()); if (rowRange.length == 2) scan.setStopRow(rowRange[1].getBytes()); } if (columns != null) { for (int i = 0; i < columns.length; i++) { scan.addColumn(family[0].getBytes(), columns[i].getBytes()); // System.out.println(family[i]+";"+columns[i]); } } else { scan.addFamily(family[0].getBytes()); } } catch (Exception e) { e.printStackTrace(); } return scan; }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(CachingRowCounter.class); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 1; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } Scan scan = new Scan(); scan.setFilter(new FirstKeyOnlyFilter()); if (sb.length() > 0) { for (String columnName : sb.toString().split(" ")) { String[] fields = columnName.split(":"); if (fields.length == 1) { scan.addFamily(Bytes.toBytes(fields[0])); } else { scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1])); } } } scan.setCaching(100); // Second argument is the table name. job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob( tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
public static void main(String[] args) throws Exception { new JobConf().setSpeculativeExecution(false); Configuration conf = new Configuration(); conf.set("es.nodes", ES_NODES); conf.set("es.resource", ES_RESOURCE); conf.set("es.mapping.id", HBaseTableMapper.ID_FIELD.toString()); conf.set("es.batch.size.bytes", "10mb"); conf.set("es.batch.size.entries", "10000"); conf.set("es.batch.write.refresh", "false"); Job job = new Job(conf); job.setJarByClass(BulkIndex.class); job.setMapperClass(HBaseTableMapper.class); job.setNumReduceTasks(0); job.setSpeculativeExecution(false); job.setOutputFormatClass(BulkProcessorOutputFormat.class); job.setMapOutputValueClass(Text.class); Scan scan = new Scan(); scan.setCaching(1000); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob( BulkLoad.HBASE_TABLE_NAME, scan, HBaseTableMapper.class, NullWritable.class, MapWritable.class, job); job.waitForCompletion(true); }
/** * Performs a full scan of a catalog table. * * @param catalogTracker * @param visitor Visitor invoked against each row. * @param startrow Where to start the scan. Pass null if want to begin scan at first row. * @param scanRoot True if we are to scan <code>-ROOT-</code> rather than <code>.META.</code>, the * default (pass false to scan .META.) * @throws IOException */ static void fullScan( CatalogTracker catalogTracker, final Visitor visitor, final byte[] startrow, final boolean scanRoot) throws IOException { Scan scan = new Scan(); if (startrow != null) scan.setStartRow(startrow); if (startrow == null && !scanRoot) { int caching = catalogTracker .getConnection() .getConfiguration() .getInt(HConstants.HBASE_META_SCANNER_CACHING, 100); scan.setCaching(caching); } scan.addFamily(HConstants.CATALOG_FAMILY); HTable metaTable = scanRoot ? getRootHTable(catalogTracker) : getMetaHTable(catalogTracker); ResultScanner scanner = metaTable.getScanner(scan); try { Result data; while ((data = scanner.next()) != null) { if (data.isEmpty()) continue; // Break if visit returns false. if (!visitor.visit(data)) break; } } finally { scanner.close(); metaTable.close(); } return; }
public static void main(String[] args) throws Exception { Configuration con = new Configuration(); String[] otherArgs = new GenericOptionsParser(con, args).getRemainingArgs(); HBaseConfiguration conf = new HBaseConfiguration(); Job job = new Job(conf, "AverageCalc"); job.setJarByClass(AverageCalculator.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes("Post")); FilterList li = new FilterList(FilterList.Operator.MUST_PASS_ALL); SingleColumnValueFilter filter = new SingleColumnValueFilter( Bytes.toBytes("Post"), Bytes.toBytes("PostTypeId"), CompareOp.EQUAL, Bytes.toBytes("1")); li.addFilter(filter); scan.setFilter(li); FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); job.setOutputKeyClass(Text.class); TableMapReduceUtil.initTableMapperJob( "bigd24-hbase-sample", scan, Mapper1.class, Text.class, IntWritable.class, job); job.setReducerClass(Reducer1.class); job.setOutputValueClass(FloatWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
private Scan createScan(String applicationName, Range range, boolean scanBackward) { Scan scan = new Scan(); scan.setCaching(this.scanCacheSize); byte[] bApplicationName = Bytes.toBytes(applicationName); byte[] traceIndexStartKey = SpanUtils.getTraceIndexRowKey(bApplicationName, range.getFrom()); byte[] traceIndexEndKey = SpanUtils.getTraceIndexRowKey(bApplicationName, range.getTo()); if (scanBackward) { // start key is replaced by end key because key has been reversed scan.setStartRow(traceIndexEndKey); scan.setStopRow(traceIndexStartKey); } else { scan.setReversed(true); scan.setStartRow(traceIndexStartKey); scan.setStopRow(traceIndexEndKey); } scan.addFamily(HBaseTables.APPLICATION_TRACE_INDEX_CF_TRACE); scan.setId("ApplicationTraceIndexScan"); // toString() method of Scan converts a message to json format so it is slow for the first time. logger.trace("create scan:{}", scan); return scan; }
public static void htableFile() throws Exception { Job job = new Job(conf, "ExampleSummaryToFile"); job.setJarByClass(HbaseMR.class); // class that contains mapper Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( "sourceTable", // input table scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper class Text.class, // mapper output key IntWritable.class, // mapper output value job); job.setReducerClass(MyReducer4.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required FileOutputFormat.setOutputPath(new JobConf(conf), new Path("/tmp/mr/mySummaryFile")); // adjust // directories // as // required boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
public static void main(String args[]) { if (args.length == 0) { System.out.println("JavaHBaseDistributedScan {master} {tableName}"); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseDistributedScan"); jsc.addJar("SparkHBase.jar"); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); Scan scan = new Scan(); scan.setCaching(100); JavaRDD<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> javaRdd = hbaseContext.hbaseRDD(tableName, scan); List<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> results = javaRdd.collect(); results.size(); }
public static void readWriteHtable() throws Exception { Job job = new Job(conf, "ExampleReadWrite"); job.setJarByClass(HbaseMR.class); // class that contains mapper Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( "sourceTable", // input table scan, // Scan instance to control CF and attribute selection MyMapper2.class, // mapper class null, // mapper output key null, // mapper output value job); TableMapReduceUtil.initTableReducerJob( "targetTable", // output table null, // reducer class job); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
/** * Get a collection of Scans, one per region, that cover the range of the table having the given * key prefix. Thes will be used as the map task input splits. */ public static List<Scan> scansThisCubeOnly(byte[] keyPrefix, byte[][] splitKeys) throws IOException { Scan copyScan = new Scan(); copyScan.setCaching(5000); copyScan.setCacheBlocks(false); // Hack: generate a key that probably comes after all this cube's keys but doesn't include any // keys not belonging to this cube. byte[] keyAfterCube = ArrayUtils.addAll(keyPrefix, fiftyBytesFF); List<Scan> scans = new ArrayList<Scan>(); Scan scanUnderConstruction = new Scan(copyScan); for (byte[] splitKey : splitKeys) { scanUnderConstruction.setStopRow(splitKey); // Coerce scan to only touch keys belonging to this cube Scan truncated = truncateScan(scanUnderConstruction, keyPrefix, keyAfterCube); if (truncated != null) { scans.add(truncated); } scanUnderConstruction = new Scan(copyScan); scanUnderConstruction.setStartRow(splitKey); } // There's another region from last split key to the end of the table. Scan truncated = truncateScan(scanUnderConstruction, keyPrefix, keyAfterCube); if (truncated != null) { scans.add(truncated); } return scans; }
public static List<Delete> GetDeleteEventsBetween( Table VTEvent_Table, String imo_str, long first_timestamp, long last_timestamp) throws IOException { // scan // 'cdb_vessel:vessel_event',{FILTER=>"(PrefixFilter('0000003162')"} Scan GetEventsBetween = new Scan(); GetEventsBetween.setStartRow( Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - last_timestamp, 19) + "0000000000")) .setStopRow( Bytes.toBytes( imo_str + LpadNum(Long.MAX_VALUE - first_timestamp + 1, 19) + "9999999999")) .addColumn(details, exittime); GetEventsBetween.setCaching(100); Filter ExistTimeValuefilter = new ValueFilter( CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator( Bytes.toBytes(new DateTime(last_timestamp).toString(rawformatter)))); GetEventsBetween.setFilter(ExistTimeValuefilter); ResultScanner Result_ExistingEvents = VTEvent_Table.getScanner(GetEventsBetween); List<Delete> deletes = new ArrayList<Delete>(); for (Result res : Result_ExistingEvents) { deletes.add(new Delete(res.getRow())); } Result_ExistingEvents.close(); return deletes; }
// Get all events with exit at last location public static Map<Integer, VesselEvent> getAllEventsStartBeforeEndAfterBeforeLocation( Table VTEvent_Table, String IMO_str, VesselLocation location) throws IOException { Scan getAllEventsWithExistAtLastLocation = new Scan(); getAllEventsWithExistAtLastLocation .setStartRow( Bytes.toBytes( IMO_str + LpadNum(Long.MAX_VALUE - location.recordtime, 19) + "0000000000")) .setStopRow(Bytes.toBytes(IMO_str + LpadNum(Long.MAX_VALUE, 19) + "9999999999")) .addColumn(details, exittime); getAllEventsWithExistAtLastLocation.setCaching(100); Filter ExistTimeValuefilter = new ValueFilter( CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator( Bytes.toBytes(new DateTime(location.recordtime).toString(rawformatter)))); getAllEventsWithExistAtLastLocation.setFilter(ExistTimeValuefilter); ResultScanner Result_event = VTEvent_Table.getScanner(getAllEventsWithExistAtLastLocation); Map<Integer, VesselEvent> events = new HashMap<Integer, VesselEvent>(); for (Result res : Result_event) { Get get = new Get(res.getRow()); get.addColumn(details, entrytime); get.addColumn(details, entrycoordinates); Result result = VTEvent_Table.get(get); String rowkey = Bytes.toString(result.getRow()); String polygonid = rowkey.substring(26); VesselEvent VE = new VesselEvent(); VE.exittime = location.recordtime; VE.exitcoordinates = location.coordinates; VE.destination = location.destination; VE.polygonid = Integer.parseInt(polygonid); for (Cell cell : result.rawCells()) { String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); String Value = Bytes.toString(CellUtil.cloneValue(cell)); if (Qualifier.equals("entertime")) { VE.entrytime = DateTime.parse(Value, rawformatter).getMillis(); } else if (Qualifier.equals("entercoordinates")) { VE.entrycoordinates = Value; } } events.put(VE.polygonid, VE); } Result_event.close(); return events; }
public synchronized ResultScanner getScanner( final TransactionState transactionState, final Scan scan) throws IOException { if (LOG.isTraceEnabled()) LOG.trace("Enter TransactionalTable.getScanner"); if (scan.getCaching() <= 0) { scan.setCaching(getScannerCaching()); } Long value = (long) 0; TransactionalScanner scanner = new TransactionalScanner(this, transactionState, scan, value); return scanner; }
public static Job startJob(String[] args) throws IOException { // args[0] = hbase table name // args[1] = zookeeper Configuration hConf = HBaseConfiguration.create(new Configuration()); hConf.set("hbase.zookeeper.quorum", args[1]); hConf.set("scan.table", args[0]); hConf.set("hbase.zookeeper.property.clientPort", "2181"); Scan scan = new Scan(); // scan.setFilter(rowColBloomFilter()); Job job = new Job(hConf); job.setJobName("BSBM-Q11-RepartitionJoin"); job.setJarByClass(RepartitionJoinQ11.class); // Change caching to speed up the scan scan.setCaching(500); scan.setMaxVersions(200); scan.setCacheBlocks(false); // Mapper settings TableMapReduceUtil.initTableMapperJob( args[0], // input HBase table name scan, // Scan instance to control CF and attribute selection RepartitionMapper.class, // mapper CompositeKeyWritable.class, // mapper output key KeyValueArrayWritable.class, // mapper output value job); // Repartition settings job.setPartitionerClass(CompositePartitioner.class); job.setSortComparatorClass(CompositeSortComparator.class); job.setGroupingComparatorClass(CompositeGroupingComparator.class); // Reducer settings job.setReducerClass(SharedServices.RepartitionJoin_Reducer.class); // reducer class job.setNumReduceTasks(1); // at least one, adjust as required FileOutputFormat.setOutputPath(job, new Path("output/BSBMQ11")); try { System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return job; }
public static void main(String[] args) throws IOException, SolrServerException { final Configuration conf; HttpSolrServer solrServer = new HttpSolrServer("http://c1master:8983/solr"); conf = HBaseConfiguration.create(); // Define Hbase Table Name HTable table = new HTable(conf, "test_global_shop"); Scan scan = new Scan(); // Define Hbase Column Family scan.addFamily(Bytes.toBytes("shop")); scan.setCaching(1000); scan.setCacheBlocks(false); ResultScanner ss = table.getScanner(scan); System.out.println("start Storing..."); int i = 0; try { for (Result r : ss) { SolrInputDocument solrDoc = new SolrInputDocument(); solrDoc.addField("key", new String(r.getRow())); for (KeyValue kv : r.raw()) { String fieldName = new String(kv.getQualifier()); String fieldValue = new String(kv.getValue()); if (fieldName.equalsIgnoreCase("address") || fieldName.equalsIgnoreCase("category") || fieldName.equalsIgnoreCase("name") || fieldName.equalsIgnoreCase("province") || fieldName.equalsIgnoreCase("tel")) { solrDoc.addField(fieldName, fieldValue); } } solrServer.add(solrDoc); solrServer.commit(true, true, true); i = i + 1; System.out.println("Already Succcess " + i + " number data"); } ss.close(); table.close(); System.out.println("done !"); } catch (IOException e) { } finally { ss.close(); table.close(); System.out.println("error !"); } }
public static void updateExistingEventsToEndAtLastLocation( Table VTEvent_Table, long imo, VesselLocation lastlocation) throws IOException { // update existing events that started BEFORE the first new location and end after the first // to end as the last location // Find existing events that started BEFORE the first new location and end after the first Scan getEventStartedBeforeAndEndAfter = new Scan(); ; getEventStartedBeforeAndEndAfter .setStartRow( Bytes.toBytes( LpadNum(imo, 7) + LpadNum(Long.MAX_VALUE - lastlocation.recordtime, 19) + "0000000000")) .setStopRow(Bytes.toBytes(LpadNum(imo, 7) + LpadNum(Long.MAX_VALUE, 19) + "9999999999")) .addColumn(details, exittime); getEventStartedBeforeAndEndAfter.setCaching(100); Filter ExistTimeValuefilter = new ValueFilter( CompareFilter.CompareOp.GREATER, new BinaryComparator( Bytes.toBytes(new DateTime(lastlocation.recordtime).toString(rawformatter)))); getEventStartedBeforeAndEndAfter.setFilter(ExistTimeValuefilter); ResultScanner Result_eventcross = VTEvent_Table.getScanner(getEventStartedBeforeAndEndAfter); List<Put> puts = new ArrayList<Put>(); for (Result res : Result_eventcross) { // vessel event table // rowkey: imo(7)+timestamp(19 desc)+polygonid(8) // qualifier:entrytime,entrycoordinates,exittime,exitcoordinates,destination byte[] rowkey = res.getRow(); Put updateevent = new Put(rowkey); updateevent.addColumn( details, exittime, Bytes.toBytes(new DateTime(lastlocation.recordtime).toString(rawformatter))); updateevent.addColumn(details, coordinates, Bytes.toBytes(lastlocation.coordinates)); updateevent.addColumn(details, destination, Bytes.toBytes(lastlocation.destination)); puts.add(updateevent); } Result_eventcross.close(); VTEvent_Table.put(puts); }
private static void createMapReduceJob( String tableNameToIndex, Configuration conf, int caching, int versions) throws IOException, InterruptedException, ClassNotFoundException { // Set the details to TableInputFormat Scan s = new Scan(); s.setCaching(caching); s.setMaxVersions(versions); conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex); Set<Entry<String, List<String>>> entrySet = cfs.entrySet(); for (Entry<String, List<String>> entry : entrySet) { List<String> quals = entry.getValue(); addColumn(quals, Bytes.toBytes(entry.getKey()), s); } Job job = new Job(conf, "CreateIndex"); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); TableMapReduceUtil.initTableMapperJob( tableNameToIndex, // input table s, // Scan instance to control CF and attribute selection IndexCreationMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob( IndexUtils.getIndexTableName(tableNameToIndex), // output // table null, // reducer class job); if (hfileOutPath != null) { HTable table = new HTable(conf, tableNameToIndex); job.setReducerClass(KeyValueSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat.configureIncrementalLoad(job, table); } else { job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars( job.getConfiguration(), com.google.common.base.Preconditions.class); job.waitForCompletion(true); assert job.isComplete() == true; }
@SuppressWarnings({"rawtypes", "unchecked"}) public void Read() throws IOException { List list = new ArrayList(); Scan scan = new Scan(); scan.setBatch(0); scan.setCaching(10000); scan.setMaxVersions(); scan.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total")); ResultScanner rsScanner = table.getScanner(scan); for (Result rs : rsScanner) { String date = Bytes.toString(rs.getRow()); String total = Bytes.toString(rs.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total"))); list.add(date + "\t" + total); } for (int i = 0; i < 7; i++) System.out.println((String) list.get(i) + "\t" + (String) list.get(i + 7)); }
private void loadIPs() { dns = new HashMap(100000000); // ���貢�� unknownHosts = new HashMap(1000000); querying = new HashMap(100000); try { int statsCommit = 500000; HConnection connection = HConnectionManager.createConnection(HBaseConfiguration.create()); HTableInterface fetchFailTable = connection.getTable("fetchFail"); Scan scan = new Scan(); scan.setCaching(statsCommit); List<Filter> filters = new ArrayList<Filter>(); Filter filter = new ColumnPrefixFilter(Bytes.toBytes("ip")); filters.add(filter); FilterList filterList = new FilterList(filters); scan.setFilter(filterList); ResultScanner rs = fetchFailTable.getScanner(scan); long cnt = 0; for (Result r : rs) { NavigableMap<byte[], byte[]> map = r.getFamilyMap(Bytes.toBytes("cf")); String ip = Bytes.toString(map.get(Bytes.toBytes("ip"))); String host = Bytes.toString(r.getRow()).split("��")[0]; if (host != null && ip != null) { dns.put(host, ip); } if (++cnt % statsCommit == 0) { LOG.info("loadIPs url=" + Bytes.toString(r.getRow()) + " cnt=" + cnt); } } rs.close(); fetchFailTable.close(); LOG.info("load hostip cache=" + dns.size()); connection.close(); } catch (Exception e) { e.printStackTrace(); } finally { // } }
// MapReduce Stage-1 Job public static Job startJob_Stage1(String[] args, Configuration hConf) throws IOException { // args[0] = hbase table name // args[1] = zookeeper /* * MapReduce Stage-1 Job * Retrieve a list of subjects and their attributes */ Scan scan1 = new Scan(); Job job1 = new Job(hConf); job1.setJobName("BSBM-Q8-RepartitionJoin"); job1.setJarByClass(RepartitionJoinQ8.class); // Change caching and number of time stamps to speed up the scan scan1.setCaching(500); scan1.setMaxVersions(200); scan1.setCacheBlocks(false); // Mapper settings TableMapReduceUtil.initTableMapperJob( args[0], // input HBase table name scan1, // Scan instance to control CF and attribute selection RepartitionMapper.class, // mapper class CompositeKeyWritable.class, // mapper output key KeyValueArrayWritable.class, // mapper output value job1); // Reducer settings job1.setReducerClass(RepartitionReducer.class); job1.setOutputFormatClass(TextOutputFormat.class); // job1.setNumReduceTasks(1); // Uncomment this if running into problems on 2+ node cluster FileOutputFormat.setOutputPath(job1, new Path("output/BSBMQ8")); try { job1.waitForCompletion(true); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return job1; }
public static List<VesselLocation> getLocationsBetween( Table VTLocation_Table, String imo_str, long first_timestamp, long last_timestamp) throws IOException { // scan // 'cdb_vessel:vessel_location',{FILTER=>"(PrefixFilter('0000003162')"} Scan GetExistingLocations = new Scan(); GetExistingLocations.setStartRow( Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - last_timestamp, 19))) .setStopRow(Bytes.toBytes(imo_str + LpadNum(Long.MAX_VALUE - first_timestamp + 1, 19))); GetExistingLocations.setCaching(1000); ResultScanner Result_ExistingLocations = VTLocation_Table.getScanner(GetExistingLocations); List<VesselLocation> result = new ArrayList<VesselLocation>(); for (Result res : Result_ExistingLocations) { VesselLocation VL = new VesselLocation(); for (Cell cell : res.rawCells()) { String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); String Value = Bytes.toString(CellUtil.cloneValue(cell)); if (Qualifier.equals("coordinates")) { VL.coordinates = Value; } else if (Qualifier.equals("speed")) { VL.speed = Value; } else if (Qualifier.equals("destination")) { VL.destination = Value; } else if (Qualifier.equals("timestamp")) { VL.recordtime = DateTime.parse(Value, rawformatter).getMillis(); } else if (Qualifier.equals("previouslocation")) { VL.previouslocation = Value; } else if (Qualifier.equals("nextlocation")) { VL.nextlocation = Value; } } result.add(VL); } Result_ExistingLocations.close(); return result; }
public static void scanNum(String tableStr) { try { Configuration conf = HBaseConfiguration.create(); Scan scan = new Scan(); scan.setCaching(5000); Filter f = new FirstKeyOnlyFilter(); scan.setFilter(f); HTable htable = new HTable(conf, Bytes.toBytes(tableStr)); ResultScanner scanner = htable.getScanner(scan); int count = 0; for (Result scannerRst : scanner) { count++; } System.out.println(tableStr + ":" + count); scanner.close(); } catch (IOException e) { e.printStackTrace(); } }
/** * Deletes the specified table with all its columns. ATTENTION: Invoking this method will delete * the table if it exists and therefore causes data loss. */ @Override public void clearStorage() throws StorageException { HBaseAdmin adm = getAdminInterface(); try { // first of all, check if table exists, if not - we are done if (!adm.tableExists(tableName)) { logger.debug("clearStorage() called before table {} was created, skipping.", tableName); return; } } catch (IOException e) { throw new TemporaryStorageException(e); } HTable table = null; try { table = new HTable(hconf, tableName); Scan scan = new Scan(); scan.setBatch(100); scan.setCacheBlocks(false); scan.setCaching(2000); ResultScanner scanner = null; try { scanner = table.getScanner(scan); for (Result res : scanner) { table.delete(new Delete(res.getRow())); } } finally { IOUtils.closeQuietly(scanner); } } catch (IOException e) { throw new TemporaryStorageException(e); } finally { IOUtils.closeQuietly(table); } }
public static void readHtable() throws Exception { Job job = new Job(conf, "ExampleRead"); job.setJarByClass(HbaseMR.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs TableMapReduceUtil.initTableMapperJob( "", // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(NullOutputFormat.class); // because we aren't // emitting anything // from mapper boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
@SuppressWarnings({"resource", "rawtypes", "unchecked"}) public static void main(String[] args) throws IOException { Configuration config = HBaseConfiguration.create(); config.set("hbase.master", "192.168.32.128:60000"); config.set("hbase.zookeeper.property.clientPort", "2181"); config.set("hbase.zookeeper.quorum", "192.168.32.128"); HTable table = new HTable(config, Bytes.toBytes("weibo")); List list = new ArrayList(); Scan scan = new Scan(); scan.setBatch(0); scan.setCaching(10000); scan.setMaxVersions(); scan.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total")); ResultScanner rsScanner = table.getScanner(scan); for (Result rs : rsScanner) { String date = Bytes.toString(rs.getRow()); String total = Bytes.toString(rs.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total"))); list.add(date + "\t" + total); } for (int i = 0; i < 7; i++) System.out.println((String) list.get(i) + "\t" + (String) list.get(i + 7)); }
@Test public void shouldRunMapReduce() throws Exception { // given Configuration configuration = HBaseConfiguration.create(); TableFactory.recreateTable( configuration, Bytes.toString(UsersDao.TABLE_NAME), Bytes.toString(UsersDao.FAMILY_NAME)); UserDataFactory.insertTestData(); // map reduce Job job = new Job(configuration, "Count Users"); job.setJarByClass(CountUsersMapper.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addColumn(UsersDao.FAMILY_NAME, UsersDao.FORENAME_COL); // mapper TableMapReduceUtil.initTableMapperJob( Bytes.toString(UsersDao.TABLE_NAME), scan, CountUsersMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); // when boolean succeeded = job.waitForCompletion(true); // then assertThat(succeeded).isTrue(); assertThat(job.getCounters().findCounter(CountUsersMapper.Counters.USER_COUNT).getValue()) .isGreaterThan(99); }
/** * It should be noticed that the stop row in scan is not included as default * * @param rowRange * @param filterList * @param family * @param columns * @param maxVersion * @return * @throws Exception */ public ResultScanner getResultSet( String[] rowRange, FilterList filterList, String[] family, String[] columns, int maxVersion) throws Exception { if (table == null) throw new Exception("No table handler"); if (cacheSize < 0) throw new Exception("should set cache size before scanning"); Scan scan = null; ResultScanner rscanner = null; try { scan = new Scan(); scan.setCaching(this.cacheSize); scan.setCacheBlocks(blockCached); scan.setFilter(filterList); if (maxVersion > 0) scan.setMaxVersions(maxVersion); // scan exclude the stop row directly, so have to make a little difference of the stop row if (rowRange != null) { scan.setStartRow(rowRange[0].getBytes()); if (rowRange.length == 2 && rowRange[1] != null) scan.setStopRow((rowRange[1]).getBytes()); } if (columns != null) { for (int i = 0; i < columns.length; i++) { scan.addColumn(family[0].getBytes(), columns[i].getBytes()); } } rscanner = this.table.getScanner(scan); } catch (Exception e) { e.printStackTrace(); } return rscanner; }
@Override protected StateScanner scanStates(byte[] startRow, byte[] stopRow) throws IOException { Scan scan = new Scan(startRow, stopRow); scan.setMaxVersions(1); scan.addColumn(QueueEntryRow.COLUMN_FAMILY, stateColumnName); scan.setCaching(MAX_SCAN_ROWS); // TODO: Add filter for getting committed processed rows only. Need to refactor // HBaseQueue2Consumer to extract that. final ResultScanner scanner = DistributedScanner.create(hTable, scan, keyDistributor, scanExecutor); return new StateScanner() { private Result result; @Override public boolean nextStateRow() throws IOException { result = scanner.next(); return result != null; } @Override public byte[] getRow() { return keyDistributor.getOriginalKey(result.getRow()); } @Override public byte[] getState() { return result.value(); } @Override public void close() throws IOException { scanner.close(); } }; }