private static HRegionServer setDrainingServer(final HRegionServer hrs) throws KeeperException { LOG.info( "Making " + hrs.getServerName() + " the draining server; " + "it has " + hrs.getNumberOfOnlineRegions() + " online regions"); ZooKeeperWatcher zkw = hrs.getZooKeeper(); String hrsDrainingZnode = ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString()); ZKUtil.createWithParents(zkw, hrsDrainingZnode); return hrs; }
/** * Test adding server to draining servers and then move regions off it. Make sure that no regions * are moved back to the draining server. * * @throws IOException * @throws KeeperException */ @Test // (timeout=30000) public void testDrainingServerOffloading() throws Exception { // I need master in the below. HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); HRegionInfo hriToMoveBack = null; // Set first server as draining server. HRegionServer drainingServer = setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0)); try { final int regionsOnDrainingServer = drainingServer.getNumberOfOnlineRegions(); Assert.assertTrue(regionsOnDrainingServer > 0); List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(drainingServer); for (HRegionInfo hri : hris) { // Pass null and AssignmentManager will chose a random server BUT it // should exclude draining servers. master.moveRegion( null, RequestConverter.buildMoveRegionRequest(hri.getEncodedNameAsBytes(), null)); // Save off region to move back. hriToMoveBack = hri; } // Wait for regions to come back on line again. waitForAllRegionsOnline(); Assert.assertEquals(0, drainingServer.getNumberOfOnlineRegions()); } finally { unsetDrainingServer(drainingServer); } // Now we've unset the draining server, we should be able to move a region // to what was the draining server. master.moveRegion( null, RequestConverter.buildMoveRegionRequest( hriToMoveBack.getEncodedNameAsBytes(), Bytes.toBytes(drainingServer.getServerName().toString()))); // Wait for regions to come back on line again. waitForAllRegionsOnline(); Assert.assertEquals(1, drainingServer.getNumberOfOnlineRegions()); }
/** * Test that draining servers are ignored even after killing regionserver(s). Verify that the * draining server is not given any of the dead servers regions. * * @throws KeeperException * @throws IOException */ @Test(timeout = 30000) public void testDrainingServerWithAbort() throws KeeperException, Exception { HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); waitForAllRegionsOnline(); final long regionCount = TEST_UTIL.getMiniHBaseCluster().countServedRegions(); // Let's get a copy of the regions today. Collection<HRegion> regions = new ArrayList<HRegion>(); for (int i = 0; i < NB_SLAVES; i++) { HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i); regions.addAll(hrs.getCopyOfOnlineRegionsSortedBySize().values()); } // Choose the draining server HRegionServer drainingServer = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0); final int regionsOnDrainingServer = drainingServer.getNumberOfOnlineRegions(); Assert.assertTrue(regionsOnDrainingServer > 0); ServerManager sm = master.getServerManager(); Collection<HRegion> regionsBefore = drainingServer.getCopyOfOnlineRegionsSortedBySize().values(); LOG.info("Regions of drained server are: " + regionsBefore); try { // Add first server to draining servers up in zk. setDrainingServer(drainingServer); // wait for the master to receive and manage the event while (sm.createDestinationServersList().contains(drainingServer.getServerName())) { Thread.sleep(1); } LOG.info("The available servers are: " + sm.createDestinationServersList()); Assert.assertEquals( "Nothing should have happened here.", regionsOnDrainingServer, drainingServer.getNumberOfOnlineRegions()); Assert.assertFalse( "We should not have regions in transition here. List is: " + master.getAssignmentManager().getRegionStates().getRegionsInTransition(), master.getAssignmentManager().getRegionStates().isRegionsInTransition()); // Kill a few regionservers. for (int aborted = 0; aborted <= 2; aborted++) { HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(aborted + 1); hrs.abort("Aborting"); } // Wait for regions to come back online again. waitForAllRegionsOnline(); Collection<HRegion> regionsAfter = drainingServer.getCopyOfOnlineRegionsSortedBySize().values(); LOG.info("Regions of drained server are: " + regionsAfter); Assert.assertEquals( "Test conditions are not met: regions were" + " created/deleted during the test. ", regionCount, TEST_UTIL.getMiniHBaseCluster().countServedRegions()); // Assert the draining server still has the same regions. StringBuilder result = new StringBuilder(); for (HRegion r : regionsAfter) { if (!regionsBefore.contains(r)) { result.append(r).append(" was added after the drain"); if (regions.contains(r)) { result.append("(existing region"); } else { result.append("(new region)"); } result.append("; "); } } for (HRegion r : regionsBefore) { if (!regionsAfter.contains(r)) { result.append(r).append(" was removed after the drain; "); } } Assert.assertTrue("Errors are: " + result.toString(), result.length() == 0); } finally { unsetDrainingServer(drainingServer); } }