public Map<String, Long> getRegionSizes(String tableName) { Map<String, Long> regions = new HashMap<>(); try { final Table table = connection.getTable(TableName.valueOf(tableName)); RegionLocator regionLocator = connection.getRegionLocator(table.getName()); List<HRegionLocation> tableRegionInfos = regionLocator.getAllRegionLocations(); Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR); for (HRegionLocation regionInfo : tableRegionInfos) { tableRegions.add(regionInfo.getRegionInfo().getRegionName()); } ClusterStatus clusterStatus = connection.getAdmin().getClusterStatus(); Collection<ServerName> servers = clusterStatus.getServers(); final long megaByte = 1024L * 1024L; for (ServerName serverName : servers) { ServerLoad serverLoad = clusterStatus.getLoad(serverName); for (RegionLoad regionLoad : serverLoad.getRegionsLoad().values()) { byte[] regionId = regionLoad.getName(); if (tableRegions.contains(regionId)) { long regionSizeBytes = regionLoad.getStorefileSizeMB() * megaByte; regions.put(regionLoad.getNameAsString(), regionSizeBytes); } } } } catch (IOException e) { e.printStackTrace(); } return regions; }
static void configureIncrementalLoad( Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { // record this table name for creating writer by favored nodes LOG.info("bulkload locality sensitive enabled"); conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString()); } // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + regionLocator.getName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info( "Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
/** * Get the split row keys of table * * @param tableName table name * @return array of split row keys * @throws IOException */ private byte[][] getTableSplitRowKeys(TableName tableName) throws IOException { try (RegionLocator locator = connection.getRegionLocator(tableName); ) { byte[][] startKeys = locator.getStartKeys(); if (startKeys.length == 1) { return null; } byte[][] splits = new byte[startKeys.length - 1][]; for (int i = 1; i < startKeys.length; i++) { splits[i - 1] = startKeys[i]; } return splits; } }
private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) { byte[] region = null; // Only one region return 0 if (this.startKeys.length == 1) { return 0; } try { // Not sure if this is cached after a split so we could have problems // here if a region splits while mapping region = table.getRegionLocation(key.get()).getRegionInfo().getStartKey(); } catch (IOException e) { LOG.error(e); } for (int i = 0; i < this.startKeys.length; i++) { if (Bytes.compareTo(region, this.startKeys[i]) == 0) { if (i >= numPartitions - 1) { // cover if we have less reduces then regions. return (Integer.toString(i).hashCode() & Integer.MAX_VALUE) % numPartitions; } return i; } } // if above fails to find start key that match we need to return something return 0; }
/** * Return the start keys of all of the regions in this table, as a list of ImmutableBytesWritable. */ private static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table) throws IOException { byte[][] byteKeys = table.getStartKeys(); ArrayList<ImmutableBytesWritable> ret = new ArrayList<ImmutableBytesWritable>(byteKeys.length); for (byte[] byteKey : byteKeys) { ret.add(new ImmutableBytesWritable(byteKey)); } return ret; }
protected void deleteRegion(Configuration conf, final Table tbl, byte[] startKey, byte[] endKey) throws IOException { LOG.info("Before delete:"); HTableDescriptor htd = tbl.getTableDescriptor(); dumpMeta(htd); List<HRegionLocation> regions; try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { regions = rl.getAllRegionLocations(); } for (HRegionLocation e : regions) { HRegionInfo hri = e.getRegionInfo(); ServerName hsa = e.getServerName(); if (Bytes.compareTo(hri.getStartKey(), startKey) == 0 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) { LOG.info("RegionName: " + hri.getRegionNameAsString()); byte[] deleteRow = hri.getRegionName(); TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true); LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()), hri.getEncodedName()); fs.delete(p, true); try (Table meta = this.connection.getTable(TableName.META_TABLE_NAME)) { Delete delete = new Delete(deleteRow); meta.delete(delete); } } LOG.info(hri.toString() + hsa.toString()); } TEST_UTIL.getMetaTableRows(htd.getTableName()); LOG.info("After delete:"); dumpMeta(htd); }
@Test(timeout = 180000) public void testCloneSnapshot() throws Exception { String nsp = prefix + "_testCloneSnapshot"; NamespaceDescriptor nspDesc = NamespaceDescriptor.create(nsp) .addConfiguration(TableNamespaceManager.KEY_MAX_TABLES, "2") .addConfiguration(TableNamespaceManager.KEY_MAX_REGIONS, "20") .build(); ADMIN.createNamespace(nspDesc); assertNotNull("Namespace descriptor found null.", ADMIN.getNamespaceDescriptor(nsp)); TableName tableName = TableName.valueOf(nsp + TableName.NAMESPACE_DELIM + "table1"); TableName cloneTableName = TableName.valueOf(nsp + TableName.NAMESPACE_DELIM + "table2"); HTableDescriptor tableDescOne = new HTableDescriptor(tableName); ADMIN.createTable(tableDescOne, Bytes.toBytes("AAA"), Bytes.toBytes("ZZZ"), 4); String snapshot = "snapshot_testCloneSnapshot"; ADMIN.snapshot(snapshot, tableName); ADMIN.cloneSnapshot(snapshot, cloneTableName); int tableLength; try (RegionLocator locator = ADMIN.getConnection().getRegionLocator(tableName)) { tableLength = locator.getStartKeys().length; } assertEquals(tableName.getNameAsString() + " should have four regions.", 4, tableLength); try (RegionLocator locator = ADMIN.getConnection().getRegionLocator(cloneTableName)) { tableLength = locator.getStartKeys().length; } assertEquals(cloneTableName.getNameAsString() + " should have four regions.", 4, tableLength); NamespaceTableAndRegionInfo nstate = getNamespaceState(nsp); assertEquals("Total tables count should be 2.", 2, nstate.getTables().size()); assertEquals("Total regions count should be.", 8, nstate.getRegionCount()); ADMIN.deleteSnapshot(snapshot); }
private static List<HBaseMRRowRange> binRanges( final List<HBaseMRRowRange> inputRanges, final Map<HRegionLocation, Map<HRegionInfo, List<HBaseMRRowRange>>> binnedRanges, final RegionLocator regionLocator) throws IOException { // Loop through ranges, getting RegionLocation and RegionInfo for // startKey, clipping range by that regionInfo's extent, and leaving // remainder in the List to be region'd final ListIterator<HBaseMRRowRange> i = inputRanges.listIterator(); while (i.hasNext()) { final HBaseMRRowRange range = i.next(); final HRegionLocation location = regionLocator.getRegionLocation(range.getStart().getBytes()); Map<HRegionInfo, List<HBaseMRRowRange>> regionInfoMap = binnedRanges.get(location); if (regionInfoMap == null) { regionInfoMap = new HashMap<HRegionInfo, List<HBaseMRRowRange>>(); binnedRanges.put(location, regionInfoMap); } final HRegionInfo regionInfo = location.getRegionInfo(); List<HBaseMRRowRange> rangeList = regionInfoMap.get(regionInfo); if (rangeList == null) { rangeList = new ArrayList<HBaseMRRowRange>(); regionInfoMap.put(regionInfo, rangeList); } if (regionInfo.containsRange(range.getStart().getBytes(), range.getEnd().getBytes())) { rangeList.add(range); i.remove(); } else { final ByteArrayRange overlappingRange = range.intersection( new ByteArrayRange( new ByteArrayId(regionInfo.getStartKey()), new ByteArrayId(regionInfo.getEndKey()))); rangeList.add(new HBaseMRRowRange(overlappingRange)); final HBaseMRRowRange uncoveredRange = new HBaseMRRowRange( new ByteArrayId(HBaseUtils.getNextPrefix(regionInfo.getEndKey())), range.getEnd()); i.add(uncoveredRange); } } return inputRanges; }
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); int hostCount = 1; int regionNum = 5; if (shouldKeepLocality) { // We should change host count higher than hdfs replica count when MiniHBaseCluster supports // explicit hostnames parameter just like MiniDFSCluster does. hostCount = 3; regionNum = 20; } byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); String[] hostnames = new String[hostCount]; for (int i = 0; i < hostCount; ++i) { hostnames[i] = "datanode_" + i; } util.startMiniCluster(1, hostCount, hostnames); Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME); Admin admin = util.getConnection().getAdmin(); ) { assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = r.getStartKeys().length; assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); // Generate the bulk load files runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Check region locality HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) { hbd.add(region.getHDFSBlocksDistribution()); } for (String hostname : hostnames) { float locality = hbd.getBlockLocalityIndex(hostname); LOG.info("locality of [" + hostname + "]: " + locality); assertEquals(100, (int) (locality * 100)); } // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { testDir.getFileSystem(conf).delete(testDir, true); util.deleteTable(TABLE_NAME); util.shutdownMiniCluster(); } }
private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); byte[][] splitKeys = generateRandomSplitKeys(4); util.startMiniCluster(); try { HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Admin admin = table.getConnection().getAdmin(); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = -1; try (RegionLocator r = table.getRegionLocator()) { numRegions = r.getStartKeys().length; } assertEquals("Should make 5 regions", numRegions, 5); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (table.getRegionLocator().getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
/** * Test the global mem store size in the region server is equal to sum of each region's mem store * size * * @throws Exception */ @Test public void testGlobalMemStore() throws Exception { // Start the cluster LOG.info("Starting cluster"); Configuration conf = HBaseConfiguration.create(); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.startMiniCluster(1, regionServerNum); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); // Create a table with regions TableName table = TableName.valueOf("TestGlobalMemStoreSize"); byte[] family = Bytes.toBytes("family"); LOG.info("Creating table with " + regionNum + " regions"); Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum); int numRegions = -1; try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) { numRegions = r.getStartKeys().length; } assertEquals(regionNum, numRegions); waitForAllRegionsAssigned(); for (HRegionServer server : getOnlineRegionServers()) { long globalMemStoreSize = 0; for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { globalMemStoreSize += server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize(); } assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize); } // check the global memstore size after flush int i = 0; for (HRegionServer server : getOnlineRegionServers()) { LOG.info( "Starting flushes on " + server.getServerName() + ", size=" + server.getRegionServerAccounting().getGlobalMemstoreSize()); for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); flush(r, server); } LOG.info("Post flush on " + server.getServerName()); long now = System.currentTimeMillis(); long timeout = now + 1000; while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0 && timeout < System.currentTimeMillis()) { Threads.sleep(10); } long size = server.getRegionServerAccounting().getGlobalMemstoreSize(); if (size > 0) { // If size > 0, see if its because the meta region got edits while // our test was running.... for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); long l = r.getMemstoreSize(); if (l > 0) { // Only meta could have edits at this stage. Give it another flush // clear them. assertTrue(regionInfo.isMetaRegion()); LOG.info(r.toString() + " " + l + ", reflushing"); r.flush(true); } } } size = server.getRegionServerAccounting().getGlobalMemstoreSize(); assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size); } ht.close(); TEST_UTIL.shutdownMiniCluster(); }