/** * A region is offline, won't be in transition any more. Its state should be the specified * expected state, which can only be Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew. */ public void regionOffline(final HRegionInfo hri, final State expectedState) { Preconditions.checkArgument( expectedState == null || RegionState.isUnassignable(expectedState), "Offlined region should not be " + expectedState); if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) { // Remove it from all region maps deleteRegion(hri); return; } State newState = expectedState == null ? State.OFFLINE : expectedState; updateRegionState(hri, newState); String encodedName = hri.getEncodedName(); synchronized (this) { regionsInTransition.remove(encodedName); ServerName oldServerName = regionAssignments.remove(hri); if (oldServerName != null && serverHoldings.containsKey(oldServerName)) { if (newState == State.MERGED || newState == State.SPLIT || hri.isMetaRegion() || tableStateManager.isTableState( hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { // Offline the region only if it's merged/split, or the table is disabled/disabling. // Otherwise, offline it from this server only when it is online on a different server. LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName); removeFromServerHoldings(oldServerName, hri); removeFromReplicaMapping(hri); } else { // Need to remember it so that we can offline it from this // server when it is online on a different server. oldAssignments.put(encodedName, oldServerName); } } } }
/** * This is an EXPENSIVE clone. Cloning though is the safest thing to do. Can't let out original * since it can change and at least the load balancer wants to iterate this exported list. We need * to synchronize on regions since all access to this.servers is under a lock on this.regions. * * @return A clone of current assignments by table. */ protected Map<TableName, Map<ServerName, List<HRegionInfo>>> getAssignmentsByTable() { Map<TableName, Map<ServerName, List<HRegionInfo>>> result = new HashMap<TableName, Map<ServerName, List<HRegionInfo>>>(); synchronized (this) { if (!server .getConfiguration() .getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false)) { Map<ServerName, List<HRegionInfo>> svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size()); for (Map.Entry<ServerName, Set<HRegionInfo>> e : serverHoldings.entrySet()) { svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue())); } result.put(TableName.valueOf(HConstants.ENSEMBLE_TABLE_NAME), svrToRegions); } else { for (Map.Entry<ServerName, Set<HRegionInfo>> e : serverHoldings.entrySet()) { for (HRegionInfo hri : e.getValue()) { if (hri.isMetaRegion()) continue; TableName tablename = hri.getTable(); Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename); if (svrToRegions == null) { svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size()); result.put(tablename, svrToRegions); } List<HRegionInfo> regions = svrToRegions.get(e.getKey()); if (regions == null) { regions = new ArrayList<HRegionInfo>(); svrToRegions.put(e.getKey(), regions); } regions.add(hri); } } } } Map<ServerName, ServerLoad> onlineSvrs = serverManager.getOnlineServers(); // Take care of servers w/o assignments, and remove servers in draining mode List<ServerName> drainingServers = this.serverManager.getDrainingServersList(); for (Map<ServerName, List<HRegionInfo>> map : result.values()) { for (ServerName svr : onlineSvrs.keySet()) { if (!map.containsKey(svr)) { map.put(svr, new ArrayList<HRegionInfo>()); } } map.keySet().removeAll(drainingServers); } return result; }
@Test public void testCloseRegionWhenServerNameIsEmpty() throws Exception { String tbName = "TestHBACloseRegionWhenServerNameIsEmpty"; byte[] TABLENAME = Bytes.toBytes(tbName); HBaseAdmin admin = createTable(TABLENAME); HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME); try { List<HRegionInfo> onlineRegions = rs.getOnlineRegions(); for (HRegionInfo regionInfo : onlineRegions) { if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) { if (regionInfo.getRegionNameAsString().contains(tbName)) { admin.closeRegionWithEncodedRegionName(regionInfo.getEncodedName(), " "); } } } fail("The test should throw exception if the servername passed is empty."); } catch (IllegalArgumentException e) { } }
@Test public void testCloseRegionIfInvalidRegionNameIsPassed() throws Exception { String tbName = "TestCloseRegionIfInvalidRegionName"; byte[] TABLENAME = Bytes.toBytes(tbName); HBaseAdmin admin = createTable(TABLENAME); HRegionInfo info = null; HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME); List<HRegionInfo> onlineRegions = rs.getOnlineRegions(); for (HRegionInfo regionInfo : onlineRegions) { if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) { if (regionInfo.getRegionNameAsString().contains(tbName)) { info = regionInfo; admin.closeRegionWithEncodedRegionName("sample", rs.getServerInfo().getHostnamePort()); } } } onlineRegions = rs.getOnlineRegions(); assertTrue( "The region should be present in online regions list.", onlineRegions.contains(info)); }
@Test public void testShouldCloseTheRegionBasedOnTheEncodedRegionName() throws Exception { String tbName = "TestHBACloseRegion"; byte[] TABLENAME = Bytes.toBytes(tbName); HBaseAdmin admin = createTable(TABLENAME); HRegionInfo info = null; HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME); List<HRegionInfo> onlineRegions = rs.getOnlineRegions(); for (HRegionInfo regionInfo : onlineRegions) { if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) { info = regionInfo; admin.closeRegionWithEncodedRegionName( regionInfo.getEncodedName(), rs.getServerInfo().getHostnamePort()); } } Thread.sleep(1000); onlineRegions = rs.getOnlineRegions(); assertFalse( "The region should not be present in online regions list.", onlineRegions.contains(info)); }
/** * Generate a global load balancing plan according to the specified map of server information to * the most loaded regions of each server. * * <p>The load balancing invariant is that all servers are within 1 region of the average number * of regions per server. If the average is an integer number, all servers will be balanced to the * average. Otherwise, all servers will have either floor(average) or ceiling(average) regions. * * <p>HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that we can fetch from * both ends of the queue. At the beginning, we check whether there was empty region server just * discovered by Master. If so, we alternately choose new / old regions from head / tail of * regionsToMove, respectively. This alternation avoids clustering young regions on the newly * discovered region server. Otherwise, we choose new regions from head of regionsToMove. * * <p>Another improvement from HBASE-3609 is that we assign regions from regionsToMove to * underloaded servers in round-robin fashion. Previously one underloaded server would be filled * before we move onto the next underloaded server, leading to clustering of young regions. * * <p>Finally, we randomly shuffle underloaded servers so that they receive offloaded regions * relatively evenly across calls to balanceCluster(). * * <p>The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of regions each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * <li>Iterate down the most loaded servers, shedding regions from each so each server hosts * exactly <b>MAX</b> regions. Stop once you reach a server that already has <= * <b>MAX</b> regions. * <p>Order the regions to move from most recent to least. * <li>Iterate down the least loaded servers, assigning regions so each server has exactly * </b>MIN</b> regions. Stop once you reach a server that already has >= <b>MIN</b> * regions. * <p>Regions being assigned to underloaded servers are those that were shed in the previous * step. It is possible that there were not enough regions shed to fill each underloaded * server to <b>MIN</b>. If so we end up with a number of regions required to do so, * <b>neededRegions</b>. * <p>It is also possible that we were able to fill each underloaded but ended up with * regions that were unassigned from overloaded servers but that still do not have * assignment. * <p>If neither of these conditions hold (no regions needed to fill the underloaded * servers, no regions leftover from overloaded servers), we are done and return. Otherwise * we handle these cases below. * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers), we iterate the most * loaded servers again, shedding a single server from each (this brings them from having * <b>MAX</b> regions to having <b>MIN</b> regions). * <li>We now definitely have more regions that need assignment, either from the previous step * or from the original shedding from overloaded servers. Iterate the least loaded servers * filling each to <b>MIN</b>. * <li>If we still have more regions that need assignment, again iterate the least loaded * servers, this time giving each one (filling them to </b>MAX</b>) until we run out. * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions. * <p>In addition, any server hosting >= <b>MAX</b> regions is guaranteed to end up with * <b>MAX</b> regions at the end of the balancing. This ensures the minimal number of * regions possible are moved. * </ol> * * TODO: We can at-most reassign the number of regions away from a particular server to be how * many they report as most loaded. Should we just keep all assignment in memory? Any objections? * Does this mean we need HeapSize on HMaster? Or just careful monitor? (current thinking is we * will hold all assignments in memory) * * @param clusterMap Map of regionservers and their load/region information to a list of their * most loaded regions * @return a list of regions to be moved, including source and destination, or null if cluster is * already balanced */ @Override public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) { List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap); if (regionsToReturn != null) { return regionsToReturn; } filterExcludedServers(clusterMap); boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); Collection<ServerName> backupMasters = getBackupMasters(); ClusterLoadState cs = new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap); // construct a Cluster object with clusterMap and rest of the // argument as defaults Cluster c = new Cluster( masterServerName, clusterMap, null, this.regionFinder, getBackupMasters(), tablesOnMaster, this.rackManager); if (!this.needsBalance(c)) return null; int numServers = cs.getNumServers(); NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad(); int numRegions = cs.getNumRegions(); float average = cs.getLoadAverage(); int max = (int) Math.ceil(average); int min = (int) average; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam .append("Balance parameter: numRegions=") .append(numRegions) .append(", numServers=") .append(numServers) .append(", numBackupMasters=") .append(cs.getNumBackupMasters()) .append(", backupMasterWeight=") .append(backupMasterWeight) .append(", max=") .append(max) .append(", min=") .append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create(); regionsToReturn = new ArrayList<RegionPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch regions from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int load = sal.getLoad(); if (load <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<HRegionInfo> regions = server.getValue(); int w = 1; // Normal region server has weight 1 if (backupMasters != null && backupMasters.contains(sal.getServerName())) { w = backupMasterWeight; // Backup master has heavier weight } int numToOffload = Math.min((load - max) / w, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload; ) { HRegionInfo hri = regions.get(i); // fetch from head if (fetchFromTail) { hri = regions.get(regions.size() - 1 - i); } i++; // Don't rebalance special regions. if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName())) continue; regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new region server if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = regionsToMove.size(); // Walk down least loaded, filling each to the min int neededRegions = 0; // number of regions needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); int maxToTake = numRegions - min; for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { if (maxToTake == 0) break; // no more to take int load = server.getKey().getLoad(); if (load >= min && load > 0) { continue; // look for other servers which haven't reached min } int w = 1; // Normal region server has weight 1 if (backupMasters != null && backupMasters.contains(server.getKey().getServerName())) { w = backupMasterWeight; // Backup master has heavier weight } int regionsToPut = (min - load) / w; if (regionsToPut == 0) { regionsToPut = 1; } maxToTake -= regionsToPut; underloadedServers.put(server.getKey().getServerName(), regionsToPut); } // number of servers that get new regions int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (regionsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (regionsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededRegions += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededRegions == 0 && regionsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info( "Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; } // Need to do a second pass. // Either more regions to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededRegions != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload(); if (idx >= server.getValue().size()) break; HRegionInfo region = server.getValue().get(idx); if (region.isMetaRegion()) continue; // Don't move meta regions. regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededRegions == 0) { // No more regions needed, done shedding break; } } } // Now we have a set of regions that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= min) { continue; } int numToTake = min - regionCount; int numTaken = 0; while (numTaken < numToTake && 0 < regionsToMove.size()) { addRegionPlan( regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); numTaken++; if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have regions to dish out, assign underloaded to max if (0 < regionsToMove.size()) { for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= max) { break; } addRegionPlan( regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } if (regionsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!regionsToMove.isEmpty() || neededRegions != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn( "regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info( "Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; }
/** * Test the global mem store size in the region server is equal to sum of each region's mem store * size * * @throws Exception */ @Test public void testGlobalMemStore() throws Exception { // Start the cluster LOG.info("Starting cluster"); Configuration conf = HBaseConfiguration.create(); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.startMiniCluster(1, regionServerNum); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); // Create a table with regions TableName table = TableName.valueOf("TestGlobalMemStoreSize"); byte[] family = Bytes.toBytes("family"); LOG.info("Creating table with " + regionNum + " regions"); Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum); int numRegions = -1; try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) { numRegions = r.getStartKeys().length; } assertEquals(regionNum, numRegions); waitForAllRegionsAssigned(); for (HRegionServer server : getOnlineRegionServers()) { long globalMemStoreSize = 0; for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { globalMemStoreSize += server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize(); } assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize); } // check the global memstore size after flush int i = 0; for (HRegionServer server : getOnlineRegionServers()) { LOG.info( "Starting flushes on " + server.getServerName() + ", size=" + server.getRegionServerAccounting().getGlobalMemstoreSize()); for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); flush(r, server); } LOG.info("Post flush on " + server.getServerName()); long now = System.currentTimeMillis(); long timeout = now + 1000; while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0 && timeout < System.currentTimeMillis()) { Threads.sleep(10); } long size = server.getRegionServerAccounting().getGlobalMemstoreSize(); if (size > 0) { // If size > 0, see if its because the meta region got edits while // our test was running.... for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); long l = r.getMemstoreSize(); if (l > 0) { // Only meta could have edits at this stage. Give it another flush // clear them. assertTrue(regionInfo.isMetaRegion()); LOG.info(r.toString() + " " + l + ", reflushing"); r.flush(true); } } } size = server.getRegionServerAccounting().getGlobalMemstoreSize(); assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size); } ht.close(); TEST_UTIL.shutdownMiniCluster(); }