/** * Test interruptable while blocking wait on root and meta. * * @throws IOException * @throws InterruptedException */ @Test public void testInterruptWaitOnMetaAndRoot() throws IOException, InterruptedException { HRegionInterface implementation = Mockito.mock(HRegionInterface.class); HConnection connection = mockConnection(implementation); final CatalogTracker ct = constructAndStartCatalogTracker(connection); ServerName hsa = ct.getRootLocation(); Assert.assertNull(hsa); ServerName meta = ct.getMetaLocation(); Assert.assertNull(meta); Thread t = new Thread() { @Override public void run() { try { ct.waitForMeta(); } catch (InterruptedException e) { throw new RuntimeException("Interrupted", e); } } }; t.start(); while (!t.isAlive()) Threads.sleep(1); Threads.sleep(1); assertTrue(t.isAlive()); ct.stop(); // Join the thread... should exit shortly. t.join(); }
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) { t.start(); while (!t.isAlive()) { // Wait } // Wait one second. Threads.sleep(ms); Assert.assertTrue("Assert " + t.getName() + " still waiting", t.isAlive()); }
private int runTest() throws Exception { LOG.info("Starting the test"); String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName()); long runtime = util.getConfiguration().getLong(runtimeKey, DEFAULT_RUN_TIME); String numThreadKey = String.format(NUM_THREADS_KEY, this.getClass().getSimpleName()); numThreads = util.getConfiguration().getInt(numThreadKey, DEFAULT_NUM_THREADS); ArrayList<Worker> workers = new ArrayList<>(); for (int i = 0; i < numThreads; i++) { checkException(workers); Worker worker = new Worker(); LOG.info("Launching worker thread " + worker.getName()); workers.add(worker); worker.start(); } Threads.sleep(runtime / 2); LOG.info("Stopping creating new tables"); create_table.set(false); Threads.sleep(runtime / 2); LOG.info("Runtime is up"); running.set(false); checkException(workers); for (Worker worker : workers) { worker.join(); } LOG.info("All Worker threads stopped"); // verify LOG.info("Verify actions of all threads succeeded"); checkException(workers); LOG.info("Verify namespaces"); verifyNamespaces(); LOG.info("Verify states of all tables"); verifyTables(); // RUN HBCK HBaseFsck hbck = null; try { LOG.info("Running hbck"); hbck = HbckTestingUtil.doFsck(util.getConfiguration(), false); if (HbckTestingUtil.inconsistencyFound(hbck)) { // Find the inconsistency during HBCK. Leave table and namespace undropped so that // we can check outside the test. keepObjectsAtTheEnd = true; } HbckTestingUtil.assertNoErrors(hbck); LOG.info("Finished hbck"); } finally { if (hbck != null) { hbck.close(); } } return 0; }
@Override public void run() { while (!isStopped()) { try { NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(connection, TABLENAME); LOG.info("-------"); byte[] lastEndKey = HConstants.EMPTY_START_ROW; for (HRegionInfo hri : regions.navigableKeySet()) { long startKey = 0, endKey = Long.MAX_VALUE; if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) { startKey = Bytes.toLong(hri.getStartKey()); } if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) { endKey = Bytes.toLong(hri.getEndKey()); } LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri); Assert.assertTrue( "lastEndKey=" + Bytes.toString(lastEndKey) + ", startKey=" + Bytes.toString(hri.getStartKey()), Bytes.equals(lastEndKey, hri.getStartKey())); lastEndKey = hri.getEndKey(); } Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW)); LOG.info("-------"); Threads.sleep(10 + random.nextInt(50)); } catch (Throwable e) { ex = e; Assert.fail(StringUtils.stringifyException(e)); } } }
@Test public void testPreWALRestoreSkip() throws Exception { LOG.info(TestRegionObserverInterface.class.getName() + ".testPreWALRestoreSkip"); TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED); HTable table = util.createTable(tableName, new byte[][] {A, B, C}); JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer(); ServerName sn2 = rs1.getRegionServer().getServerName(); String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName(); util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes()); while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) { Thread.sleep(100); } Put put = new Put(ROW); put.add(A, A, A); put.add(B, B, B); put.add(C, C, C); table.put(put); table.flushCommits(); cluster.killRegionServer(rs1.getRegionServer().getServerName()); Threads.sleep(20000); // just to be sure that the kill has fully started. util.waitUntilAllRegionsAssigned(tableName); verifyMethodResult( SimpleRegionObserver.class, new String[] {"getCtPreWALRestore", "getCtPostWALRestore"}, tableName, new Integer[] {0, 0}); util.deleteTable(tableName); table.close(); }
@Test(timeout = 30000) public void testInfo() { HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); MetricsMasterWrapperImpl info = new MetricsMasterWrapperImpl(master); assertEquals(master.getSplitPlanCount(), info.getSplitPlanCount(), 0); assertEquals(master.getMergePlanCount(), info.getMergePlanCount(), 0); assertEquals(master.getAverageLoad(), info.getAverageLoad(), 0); assertEquals(master.getClusterId(), info.getClusterId()); assertEquals(master.getMasterActiveTime(), info.getActiveTime()); assertEquals(master.getMasterStartTime(), info.getStartTime()); assertEquals(master.getMasterCoprocessors().length, info.getCoprocessors().length); assertEquals( master.getServerManager().getOnlineServersList().size(), info.getNumRegionServers()); assertEquals(5, info.getNumRegionServers()); String zkServers = info.getZookeeperQuorum(); assertEquals(zkServers.split(",").length, TEST_UTIL.getZkCluster().getZooKeeperServerNum()); final int index = 3; LOG.info("Stopping " + TEST_UTIL.getMiniHBaseCluster().getRegionServer(index)); TEST_UTIL.getMiniHBaseCluster().stopRegionServer(index, false); TEST_UTIL.getMiniHBaseCluster().waitOnRegionServer(index); // We stopped the regionserver but could take a while for the master to notice it so hang here // until it does... then move forward to see if metrics wrapper notices. while (TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size() != 4) { Threads.sleep(10); } assertEquals(4, info.getNumRegionServers()); assertEquals(1, info.getNumDeadRegionServers()); assertEquals(1, info.getNumWALFiles()); }
@Override public void run() { while (running.get()) { switch (random.nextInt() % 2) { case 0: // start a server try { cluster.startServer(); } catch (Exception e) { LOG.warn(e); exception.compareAndSet(null, e); } break; case 1: // stop a server try { cluster.stopRandomServer(); } catch (Exception e) { LOG.warn(e); exception.compareAndSet(null, e); } default: } Threads.sleep(100); } }
@Override public void preGetOp( final ObserverContext<RegionCoprocessorEnvironment> e, final Get get, final List<Cell> results) throws IOException { Threads.sleep(2500); }
/** * sleeping logic for static methods; handles the interrupt exception. Keeping a static version * for this to avoid re-looking for the integer values. */ protected static void sleepBeforeRetry(String msg, int sleepMultiplier) { if (sleepMultiplier > hdfsClientRetriesNumber) { LOG.warn(msg + ", retries exhausted"); return; } LOG.info(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); Threads.sleep(baseSleepBeforeRetries * sleepMultiplier); }
/** * sleeping logic for static methods; handles the interrupt exception. Keeping a static version * for this to avoid re-looking for the integer values. */ private static void sleepBeforeRetry( String msg, int sleepMultiplier, int baseSleepBeforeRetries, int hdfsClientRetriesNumber) { if (sleepMultiplier > hdfsClientRetriesNumber) { LOG.debug(msg + ", retries exhausted"); return; } LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); Threads.sleep((long) baseSleepBeforeRetries * sleepMultiplier); }
@Test public void testRecovery() throws Exception { LOG.info(TestRegionObserverInterface.class.getName() + ".testRecovery"); TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + ".testRecovery"); HTable table = util.createTable(tableName, new byte[][] {A, B, C}); try { JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer(); ServerName sn2 = rs1.getRegionServer().getServerName(); String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName(); util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes()); while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) { Thread.sleep(100); } Put put = new Put(ROW); put.add(A, A, A); put.add(B, B, B); put.add(C, C, C); table.put(put); verifyMethodResult( SimpleRegionObserver.class, new String[] { "hadPreGet", "hadPostGet", "hadPrePut", "hadPostPut", "hadPreBatchMutate", "hadPostBatchMutate", "hadDelete" }, tableName, new Boolean[] {false, false, true, true, true, true, false}); verifyMethodResult( SimpleRegionObserver.class, new String[] {"getCtPreWALRestore", "getCtPostWALRestore", "getCtPrePut", "getCtPostPut"}, tableName, new Integer[] {0, 0, 1, 1}); cluster.killRegionServer(rs1.getRegionServer().getServerName()); Threads.sleep(1000); // Let the kill soak in. util.waitUntilAllRegionsAssigned(tableName); LOG.info("All regions assigned"); verifyMethodResult( SimpleRegionObserver.class, new String[] {"getCtPrePut", "getCtPostPut"}, tableName, new Integer[] {0, 0}); } finally { util.deleteTable(tableName); table.close(); } }
@Override public Result postAppend( final ObserverContext<RegionCoprocessorEnvironment> e, final Append append, final Result result) throws IOException { if (ct.incrementAndGet() == 1) { Threads.sleep(sleepTime.get()); } return result; }
@Override protected void beforeWaitOnSafePoint() { if (throwException) { LOG.info("COUNTDOWN"); // Don't countdown latch until someone waiting on it otherwise, the above // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll // be stuck; test won't go down while (this.latch.getCount() <= 0) Threads.sleep(1); this.latch.countDown(); } }
@Override public MultiResponse multi(RpcController controller, MultiRequest request) throws ServiceException { int concurrentInvocations = this.multiInvocationsCount.incrementAndGet(); try { if (concurrentInvocations >= tooManyMultiRequests) { throw new ServiceException( new RegionTooBusyException("concurrentInvocations=" + concurrentInvocations)); } Threads.sleep(multiPause); return doMultiResponse(meta, sequenceids, request); } finally { this.multiInvocationsCount.decrementAndGet(); } }
public void testRpcWithChaosMonkey(boolean isSyncClient) throws Throwable { LOG.info("Starting test"); Cluster cluster = new Cluster(10, 100); for (int i = 0; i < 10; i++) { cluster.startServer(); } ArrayList<SimpleClient> clients = new ArrayList<>(); // all threads should share the same rpc client AbstractRpcClient<?> rpcClient = createRpcClient(conf, isSyncClient); for (int i = 0; i < 30; i++) { String clientId = "client_" + i + "_"; LOG.info("Starting client: " + clientId); SimpleClient client = new SimpleClient(cluster, rpcClient, clientId); client.start(); clients.add(client); } LOG.info("Starting MiniChaosMonkey"); MiniChaosMonkey cm = new MiniChaosMonkey(cluster); cm.start(); Threads.sleep(30000); LOG.info("Stopping MiniChaosMonkey"); cm.stopRunning(); cm.join(); cm.rethrowException(); LOG.info("Stopping clients"); for (SimpleClient client : clients) { LOG.info("Stopping client: " + client.id); LOG.info(client.id + " numCalls:" + client.numCalls); client.stopRunning(); client.join(); client.rethrowException(); assertTrue(client.numCalls > 10); } LOG.info("Stopping RpcClient"); rpcClient.close(); LOG.info("Stopping Cluster"); cluster.stopRunning(); }
@Override public void run() { while (!isStopped()) { try { List<HRegionInfo> regions = MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false); // select a random region HRegionInfo parent = regions.get(random.nextInt(regions.size())); if (parent == null || !TABLENAME.equals(parent.getTable())) { continue; } long startKey = 0, endKey = Long.MAX_VALUE; byte[] start = parent.getStartKey(); byte[] end = parent.getEndKey(); if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) { startKey = Bytes.toLong(parent.getStartKey()); } if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) { endKey = Bytes.toLong(parent.getEndKey()); } if (startKey == endKey) { continue; } long midKey = BigDecimal.valueOf(startKey) .add(BigDecimal.valueOf(endKey)) .divideToIntegralValue(BigDecimal.valueOf(2)) .longValue(); HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey)); HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end); MetaTableAccessor.splitRegion( connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0)); Threads.sleep(random.nextInt(200)); } catch (Throwable e) { ex = e; Assert.fail(StringUtils.stringifyException(e)); } } }
@Test(timeout = 300000) public void testClusterRestart() throws Exception { UTIL.startMiniCluster(3); while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { Threads.sleep(1); } LOG.info("\n\nCreating tables"); for (byte[] TABLE : TABLES) { UTIL.createTable(TABLE, FAMILY); } for (byte[] TABLE : TABLES) { UTIL.waitTableEnabled(TABLE); } List<HRegionInfo> allRegions = MetaScanner.listAllRegions(UTIL.getConfiguration(), true); assertEquals(4, allRegions.size()); LOG.info("\n\nShutting down cluster"); UTIL.shutdownMiniHBaseCluster(); LOG.info("\n\nSleeping a bit"); Thread.sleep(2000); LOG.info("\n\nStarting cluster the second time"); UTIL.restartHBaseCluster(3); // Need to use a new 'Configuration' so we make a new HConnection. // Otherwise we're reusing an HConnection that has gone stale because // the shutdown of the cluster also called shut of the connection. allRegions = MetaScanner.listAllRegions(new Configuration(UTIL.getConfiguration()), true); assertEquals(4, allRegions.size()); LOG.info("\n\nWaiting for tables to be available"); for (byte[] TABLE : TABLES) { try { UTIL.createTable(TABLE, FAMILY); assertTrue("Able to create table that should already exist", false); } catch (TableExistsException tee) { LOG.info("Table already exists as expected"); } UTIL.waitTableAvailable(TABLE); } }
public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU) throws IOException, InterruptedException { // Stop master HMaster master = HTU.getHBaseCluster().getMaster(); ServerName masterAddr = master.getServerName(); master.stopMaster(); Log.info("Waiting until master thread exits"); while (HTU.getHBaseCluster().getMasterThread() != null && HTU.getHBaseCluster().getMasterThread().isAlive()) { Threads.sleep(100); } HRegionServer.TEST_SKIP_REPORTING_TRANSITION = true; // Master is down, so is the meta. We need to assign it somewhere // so that regions can be assigned during the mocking phase. HRegionServer hrs = HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer(); ZooKeeperWatcher zkw = hrs.getZooKeeper(); MetaTableLocator mtl = new MetaTableLocator(); ServerName sn = mtl.getMetaRegionLocation(zkw); if (sn != null && !masterAddr.equals(sn)) { return; } ProtobufUtil.openRegion( hrs.getRSRpcServices(), hrs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO); while (true) { sn = mtl.getMetaRegionLocation(zkw); if (sn != null && sn.equals(hrs.getServerName()) && hrs.onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) { break; } Thread.sleep(100); } }
@Test public void testInterrupt50Percent() throws IOException, InterruptedException { final AtomicInteger noEx = new AtomicInteger(0); final AtomicInteger badEx = new AtomicInteger(0); final AtomicInteger noInt = new AtomicInteger(0); final AtomicInteger done = new AtomicInteger(0); List<Thread> threads = new ArrayList<Thread>(); final int nbThread = 100; for (int i = 0; i < nbThread; i++) { Thread t = new Thread() { @Override public void run() { try { Table ht = util.getConnection().getTable(tableName); Result r = ht.get(new Get(row1)); noEx.incrementAndGet(); } catch (IOException e) { LOG.info("exception", e); if (!(e instanceof InterruptedIOException) || (e instanceof SocketTimeoutException)) { badEx.incrementAndGet(); } else { if (Thread.currentThread().isInterrupted()) { noInt.incrementAndGet(); LOG.info("The thread should NOT be with the 'interrupt' status."); } } } finally { done.incrementAndGet(); } } }; t.setName("TestClientOperationInterrupt #" + i); threads.add(t); t.start(); } for (int i = 0; i < nbThread / 2; i++) { threads.get(i).interrupt(); } boolean stillAlive = true; while (stillAlive) { stillAlive = false; for (Thread t : threads) { if (t.isAlive()) { stillAlive = true; } } Threads.sleep(10); } Assert.assertFalse(Thread.currentThread().isInterrupted()); Assert.assertTrue( " noEx: " + noEx.get() + ", badEx=" + badEx.get() + ", noInt=" + noInt.get(), noEx.get() == nbThread / 2 && badEx.get() == 0); // The problem here is that we need the server to free its handlers to handle all operations while (done.get() != nbThread) { Thread.sleep(1); } Table ht = util.getConnection().getTable(tableName); Result r = ht.get(new Get(row1)); Assert.assertFalse(r.isEmpty()); }
/** * Test the global mem store size in the region server is equal to sum of each region's mem store * size * * @throws Exception */ @Test public void testGlobalMemStore() throws Exception { // Start the cluster LOG.info("Starting cluster"); Configuration conf = HBaseConfiguration.create(); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.startMiniCluster(1, regionServerNum); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); // Create a table with regions TableName table = TableName.valueOf("TestGlobalMemStoreSize"); byte[] family = Bytes.toBytes("family"); LOG.info("Creating table with " + regionNum + " regions"); Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum); int numRegions = -1; try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) { numRegions = r.getStartKeys().length; } assertEquals(regionNum, numRegions); waitForAllRegionsAssigned(); for (HRegionServer server : getOnlineRegionServers()) { long globalMemStoreSize = 0; for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { globalMemStoreSize += server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize(); } assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize); } // check the global memstore size after flush int i = 0; for (HRegionServer server : getOnlineRegionServers()) { LOG.info( "Starting flushes on " + server.getServerName() + ", size=" + server.getRegionServerAccounting().getGlobalMemstoreSize()); for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); flush(r, server); } LOG.info("Post flush on " + server.getServerName()); long now = System.currentTimeMillis(); long timeout = now + 1000; while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0 && timeout < System.currentTimeMillis()) { Threads.sleep(10); } long size = server.getRegionServerAccounting().getGlobalMemstoreSize(); if (size > 0) { // If size > 0, see if its because the meta region got edits while // our test was running.... for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) { Region r = server.getFromOnlineRegions(regionInfo.getEncodedName()); long l = r.getMemstoreSize(); if (l > 0) { // Only meta could have edits at this stage. Give it another flush // clear them. assertTrue(regionInfo.isMetaRegion()); LOG.info(r.toString() + " " + l + ", reflushing"); r.flush(true); } } } size = server.getRegionServerAccounting().getGlobalMemstoreSize(); assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size); } ht.close(); TEST_UTIL.shutdownMiniCluster(); }
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); int hostCount = 1; int regionNum = 5; if (shouldKeepLocality) { // We should change host count higher than hdfs replica count when MiniHBaseCluster supports // explicit hostnames parameter just like MiniDFSCluster does. hostCount = 3; regionNum = 20; } byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); String[] hostnames = new String[hostCount]; for (int i = 0; i < hostCount; ++i) { hostnames[i] = "datanode_" + i; } util.startMiniCluster(1, hostCount, hostnames); Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME); Admin admin = util.getConnection().getAdmin(); ) { assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = r.getStartKeys().length; assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); // Generate the bulk load files runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Check region locality HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) { hbd.add(region.getHDFSBlocksDistribution()); } for (String hostname : hostnames) { float locality = hbd.getBlockLocalityIndex(hostname); LOG.info("locality of [" + hostname + "]: " + locality); assertEquals(100, (int) (locality * 100)); } // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { testDir.getFileSystem(conf).delete(testDir, true); util.deleteTable(TABLE_NAME); util.shutdownMiniCluster(); } }
/** * Reproduce locking up that happens when we get an inopportune sync during setup for zigzaglatch * wait. See HBASE-14317. If below is broken, we will see this test timeout because it is locked * up. * * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to set up * a dodgy WAL that will throw an exception when we go to append to it. */ @Test(timeout = 20000) public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException { // A WAL that we can have throw exceptions when a flag is set. class DodgyFSLog extends FSHLog { // Set this when want the WAL to start throwing exceptions. volatile boolean throwException = false; // Latch to hold up processing until after another operation has had time to run. CountDownLatch latch = new CountDownLatch(1); public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException { super(fs, root, logDir, conf); } @Override protected void afterCreatingZigZagLatch() { // If throwException set, then append will throw an exception causing the WAL to be // rolled. We'll come in here. Hold up processing until a sync can get in before // the zigzag has time to complete its setup and get its own sync in. This is what causes // the lock up we've seen in production. if (throwException) { try { LOG.info("LATCHED"); // So, timing can have it that the test can run and the bad flush below happens // before we get here. In this case, we'll be stuck waiting on this latch but there // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint... // because all WALs have rolled. In this case, just give up on test. if (!this.latch.await(5, TimeUnit.SECONDS)) { LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!"); } } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } @Override protected void beforeWaitOnSafePoint() { if (throwException) { LOG.info("COUNTDOWN"); // Don't countdown latch until someone waiting on it otherwise, the above // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll // be stuck; test won't go down while (this.latch.getCount() <= 0) Threads.sleep(1); this.latch.countDown(); } } @Override protected Writer createWriterInstance(Path path) throws IOException { final Writer w = super.createWriterInstance(path); return new Writer() { @Override public void close() throws IOException { w.close(); } @Override public void sync() throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...SYNC"); } w.sync(); } @Override public void append(Entry entry) throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...APPEND"); } w.append(entry); } @Override public long getLength() { return w.getLength(); } }; } } // Mocked up server and regionserver services. Needed below. Server server = Mockito.mock(Server.class); Mockito.when(server.getConfiguration()).thenReturn(CONF); Mockito.when(server.isStopped()).thenReturn(false); Mockito.when(server.isAborted()).thenReturn(false); RegionServerServices services = Mockito.mock(RegionServerServices.class); // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test. FileSystem fs = FileSystem.get(CONF); Path rootDir = new Path(dir + getName()); DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF); Path originalWAL = dodgyWAL.getCurrentFileName(); // I need a log roller running. LogRoller logRoller = new LogRoller(server, services); logRoller.addWAL(dodgyWAL); // There is no 'stop' once a logRoller is running.. it just dies. logRoller.start(); // Now get a region and start adding in edits. HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME); final HRegion region = initHRegion(tableName, null, null, dodgyWAL); byte[] bytes = Bytes.toBytes(getName()); NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); scopes.put(COLUMN_FAMILY_BYTES, 0); MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(); try { // First get something into memstore. Make a Put and then pull the Cell out of it. Will // manage append and sync carefully in below to manufacture hang. We keep adding same // edit. WAL subsystem doesn't care. Put put = new Put(bytes); put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes); WALKey key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes); WALEdit edit = new WALEdit(); CellScanner CellScanner = put.cellScanner(); assertTrue(CellScanner.advance()); edit.add(CellScanner.current()); // Put something in memstore and out in the WAL. Do a big number of appends so we push // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL for (int i = 0; i < 1000; i++) { region.put(put); } // Set it so we start throwing exceptions. LOG.info("SET throwing of exception on append"); dodgyWAL.throwException = true; // This append provokes a WAL roll request dodgyWAL.append(region.getRegionInfo(), key, edit, true); boolean exception = false; try { dodgyWAL.sync(); } catch (Exception e) { exception = true; } assertTrue("Did not get sync exception", exception); // Get a memstore flush going too so we have same hung profile as up in the issue over // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up // by the zigzaglatch waiting on syncs to come home. Thread t = new Thread("Flusher") { public void run() { try { if (region.getMemstoreSize() <= 0) { throw new IOException("memstore size=" + region.getMemstoreSize()); } region.flush(false); } catch (IOException e) { // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later // when roll completes. LOG.info("In flush", e); } LOG.info("Exiting"); }; }; t.setDaemon(true); t.start(); // Wait until while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1); // Now assert I got a new WAL file put in place even though loads of errors above. assertTrue(originalWAL != dodgyWAL.getCurrentFileName()); // Can I append to it? dodgyWAL.throwException = false; try { region.put(put); } catch (Exception e) { LOG.info("In the put", e); } } finally { // To stop logRoller, its server has to say it is stopped. Mockito.when(server.isStopped()).thenReturn(true); if (logRoller != null) logRoller.close(); try { if (region != null) region.close(); if (dodgyWAL != null) dodgyWAL.close(); } catch (Exception e) { LOG.info("On way out", e); } } }
/** * Reproduce locking up that happens when there's no further syncs after append fails, and causing * an isolated sync then infinite wait. See HBASE-16960. If below is broken, we will see this test * timeout because it is locked up. * * <p>Steps for reproduce:<br> * 1. Trigger server abort through dodgyWAL1<br> * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer event handler thread * sleep for a while thus keeping {@code endOfBatch} false<br> * 3. Publish a sync then an append which will throw exception, check whether the sync could * return */ @Test(timeout = 20000) public void testLockup16960() throws IOException { // A WAL that we can have throw exceptions when a flag is set. class DodgyFSLog extends FSHLog { // Set this when want the WAL to start throwing exceptions. volatile boolean throwException = false; public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException { super(fs, root, logDir, conf); } @Override protected Writer createWriterInstance(Path path) throws IOException { final Writer w = super.createWriterInstance(path); return new Writer() { @Override public void close() throws IOException { w.close(); } @Override public void sync() throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...SYNC"); } w.sync(); } @Override public void append(Entry entry) throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...APPEND"); } w.append(entry); } @Override public long getLength() { return w.getLength(); } }; } @Override protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter) throws IOException { if (throwException) { throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath); } long oldFileLen = 0L; oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter); return oldFileLen; } } // Mocked up server and regionserver services. Needed below. Server server = new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString()); RegionServerServices services = Mockito.mock(RegionServerServices.class); CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000); // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, // go ahead with test. FileSystem fs = FileSystem.get(CONF); Path rootDir = new Path(dir + getName()); DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF); Path rootDir2 = new Path(dir + getName() + "2"); final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF); // Add a listener to force ringbuffer event handler sleep for a while dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener()); // I need a log roller running. LogRoller logRoller = new LogRoller(server, services); logRoller.addWAL(dodgyWAL1); logRoller.addWAL(dodgyWAL2); // There is no 'stop' once a logRoller is running.. it just dies. logRoller.start(); // Now get a region and start adding in edits. HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME); final HRegion region = initHRegion(tableName, null, null, dodgyWAL1); byte[] bytes = Bytes.toBytes(getName()); NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); scopes.put(COLUMN_FAMILY_BYTES, 0); MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(); try { Put put = new Put(bytes); put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes); WALKey key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes); WALEdit edit = new WALEdit(); CellScanner CellScanner = put.cellScanner(); assertTrue(CellScanner.advance()); edit.add(CellScanner.current()); LOG.info("SET throwing of exception on append"); dodgyWAL1.throwException = true; // This append provokes a WAL roll request dodgyWAL1.append(region.getRegionInfo(), key, edit, true); boolean exception = false; try { dodgyWAL1.sync(); } catch (Exception e) { exception = true; } assertTrue("Did not get sync exception", exception); // LogRoller call dodgyWAL1.rollWriter get FailedLogCloseException and // cause server abort. try { // wait LogRoller exit. Thread.sleep(50); } catch (InterruptedException e) { e.printStackTrace(); } final CountDownLatch latch = new CountDownLatch(1); // make RingBufferEventHandler sleep 1s, so the following sync // endOfBatch=false key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc, scopes); dodgyWAL2.append(region.getRegionInfo(), key, edit, true); Thread t = new Thread("Sync") { public void run() { try { dodgyWAL2.sync(); } catch (IOException e) { LOG.info("In sync", e); } latch.countDown(); LOG.info("Sync exiting"); }; }; t.setDaemon(true); t.start(); try { // make sure sync have published. Thread.sleep(100); } catch (InterruptedException e1) { e1.printStackTrace(); } // make append throw DamagedWALException key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc, scopes); dodgyWAL2.append(region.getRegionInfo(), key, edit, true); while (latch.getCount() > 0) { Threads.sleep(100); } assertTrue(server.isAborted()); } finally { if (logRoller != null) { logRoller.close(); } try { if (region != null) { region.close(); } if (dodgyWAL1 != null) { dodgyWAL1.close(); } if (dodgyWAL2 != null) { dodgyWAL2.close(); } } catch (Exception e) { LOG.info("On way out", e); } } }
private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); byte[][] splitKeys = generateRandomSplitKeys(4); util.startMiniCluster(); try { HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Admin admin = table.getConnection().getAdmin(); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = -1; try (RegionLocator r = table.getRegionLocator()) { numRegions = r.getStartKeys().length; } assertEquals("Should make 5 regions", numRegions, 5); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (table.getRegionLocator().getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
/** * Test for HBASE-4288. Throw an IOE when trying to verify meta region and prove it doesn't cause * master shutdown. * * @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a> * @throws IOException * @throws InterruptedException * @throws KeeperException */ @Test public void testServerNotRunningIOException() throws IOException, InterruptedException, KeeperException { // Mock an HRegionInterface. final HRegionInterface implementation = Mockito.mock(HRegionInterface.class); HConnection connection = mockConnection(implementation); // If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE // the first time. 'Succeed' the second time we are called. Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())) .thenThrow(new IOException("Server not running, aborting")) .thenReturn(new HRegionInfo()); // After we encounter the above 'Server not running', we should catch the // IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to // get new meta location. Return something so this 'get' succeeds // (here we mock up getRegionServerWithRetries, the wrapper around // the actual get). // TODO: Refactor. This method has been moved out of HConnection. // It works for now but has been deprecated. Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>) Mockito.any())) .thenReturn(getMetaTableRowResult()); // Now start up the catalogtracker with our doctored Connection. final CatalogTracker ct = constructAndStartCatalogTracker(connection); try { // Set a location for root and meta. RootLocationEditor.setRootLocation(this.watcher, SN); ct.setMetaLocation(SN); // Call the method that HBASE-4288 calls. It will try and verify the // meta location and will fail on first attempt then go into a long wait. // So, do this in a thread and then reset meta location to break it out // of its wait after a bit of time. final AtomicBoolean metaSet = new AtomicBoolean(false); final CountDownLatch latch = new CountDownLatch(1); Thread t = new Thread() { @Override public void run() { try { latch.countDown(); metaSet.set(ct.waitForMeta(100000) != null); } catch (Exception e) { throw new RuntimeException(e); } } }; t.start(); latch.await(); Threads.sleep(1); // Now reset the meta as though it were redeployed. ct.setMetaLocation(SN); t.join(); Assert.assertTrue(metaSet.get()); } finally { // Clean out root and meta locations or later tests will be confused... // they presume start fresh in zk. ct.resetMetaLocation(); } }
@Test public void testConcurrentMetaScannerAndCatalogJanitor() throws Throwable { /* TEST PLAN: start with only one region in a table. Have a splitter * thread and metascanner threads that continously scan the meta table for regions. * CatalogJanitor from master will run frequently to clean things up */ TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 500); setUp(); final long runtime = 30 * 1000; // 30 sec LOG.info("Starting testConcurrentMetaScannerAndCatalogJanitor"); final TableName TABLENAME = TableName.valueOf("testConcurrentMetaScannerAndCatalogJanitor"); final byte[] FAMILY = Bytes.toBytes("family"); TEST_UTIL.createTable(TABLENAME, FAMILY); class RegionMetaSplitter extends StoppableImplementation implements Runnable { Random random = new Random(); Throwable ex = null; @Override public void run() { while (!isStopped()) { try { List<HRegionInfo> regions = MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false); // select a random region HRegionInfo parent = regions.get(random.nextInt(regions.size())); if (parent == null || !TABLENAME.equals(parent.getTable())) { continue; } long startKey = 0, endKey = Long.MAX_VALUE; byte[] start = parent.getStartKey(); byte[] end = parent.getEndKey(); if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) { startKey = Bytes.toLong(parent.getStartKey()); } if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) { endKey = Bytes.toLong(parent.getEndKey()); } if (startKey == endKey) { continue; } long midKey = BigDecimal.valueOf(startKey) .add(BigDecimal.valueOf(endKey)) .divideToIntegralValue(BigDecimal.valueOf(2)) .longValue(); HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey)); HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end); MetaTableAccessor.splitRegion( connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0)); Threads.sleep(random.nextInt(200)); } catch (Throwable e) { ex = e; Assert.fail(StringUtils.stringifyException(e)); } } } void rethrowExceptionIfAny() throws Throwable { if (ex != null) { throw ex; } } } class MetaScannerVerifier extends StoppableImplementation implements Runnable { Random random = new Random(); Throwable ex = null; @Override public void run() { while (!isStopped()) { try { NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(connection, TABLENAME); LOG.info("-------"); byte[] lastEndKey = HConstants.EMPTY_START_ROW; for (HRegionInfo hri : regions.navigableKeySet()) { long startKey = 0, endKey = Long.MAX_VALUE; if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) { startKey = Bytes.toLong(hri.getStartKey()); } if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) { endKey = Bytes.toLong(hri.getEndKey()); } LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri); Assert.assertTrue( "lastEndKey=" + Bytes.toString(lastEndKey) + ", startKey=" + Bytes.toString(hri.getStartKey()), Bytes.equals(lastEndKey, hri.getStartKey())); lastEndKey = hri.getEndKey(); } Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW)); LOG.info("-------"); Threads.sleep(10 + random.nextInt(50)); } catch (Throwable e) { ex = e; Assert.fail(StringUtils.stringifyException(e)); } } } void rethrowExceptionIfAny() throws Throwable { if (ex != null) { throw ex; } } } RegionMetaSplitter regionMetaSplitter = new RegionMetaSplitter(); MetaScannerVerifier metaScannerVerifier = new MetaScannerVerifier(); Thread regionMetaSplitterThread = new Thread(regionMetaSplitter); Thread metaScannerVerifierThread = new Thread(metaScannerVerifier); regionMetaSplitterThread.start(); metaScannerVerifierThread.start(); Threads.sleep(runtime); regionMetaSplitter.stop("test finished"); metaScannerVerifier.stop("test finished"); regionMetaSplitterThread.join(); metaScannerVerifierThread.join(); regionMetaSplitter.rethrowExceptionIfAny(); metaScannerVerifier.rethrowExceptionIfAny(); }
/** This tests retaining assignments on a cluster restart */ @Test(timeout = 300000) public void testRetainAssignmentOnRestart() throws Exception { UTIL.startMiniCluster(2); while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { Threads.sleep(1); } // Turn off balancer UTIL.getMiniHBaseCluster().getMaster().getMasterRpcServices().synchronousBalanceSwitch(false); LOG.info("\n\nCreating tables"); for (byte[] TABLE : TABLES) { UTIL.createTable(TABLE, FAMILY); } for (byte[] TABLE : TABLES) { UTIL.waitTableEnabled(TABLE); } HMaster master = UTIL.getMiniHBaseCluster().getMaster(); UTIL.waitUntilNoRegionsInTransition(120000); // We don't have to use SnapshotOfRegionAssignmentFromMeta. // We use it here because AM used to use it to load all user region placements SnapshotOfRegionAssignmentFromMeta snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection()); snapshot.initialize(); Map<HRegionInfo, ServerName> regionToRegionServerMap = snapshot.getRegionToRegionServerMap(); MiniHBaseCluster cluster = UTIL.getHBaseCluster(); List<JVMClusterUtil.RegionServerThread> threads = cluster.getLiveRegionServerThreads(); assertEquals(2, threads.size()); int[] rsPorts = new int[3]; for (int i = 0; i < 2; i++) { rsPorts[i] = threads.get(i).getRegionServer().getServerName().getPort(); } rsPorts[2] = cluster.getMaster().getServerName().getPort(); for (ServerName serverName : regionToRegionServerMap.values()) { boolean found = false; // Test only, no need to optimize for (int k = 0; k < 3 && !found; k++) { found = serverName.getPort() == rsPorts[k]; } assertTrue(found); } LOG.info("\n\nShutting down HBase cluster"); cluster.shutdown(); cluster.waitUntilShutDown(); LOG.info("\n\nSleeping a bit"); Thread.sleep(2000); LOG.info("\n\nStarting cluster the second time with the same ports"); try { cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 4); master = cluster.startMaster().getMaster(); for (int i = 0; i < 3; i++) { cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, rsPorts[i]); cluster.startRegionServer(); } } finally { // Reset region server port so as not to conflict with other tests cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, 0); cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 2); } // Make sure live regionservers are on the same host/port List<ServerName> localServers = master.getServerManager().getOnlineServersList(); assertEquals(4, localServers.size()); for (int i = 0; i < 3; i++) { boolean found = false; for (ServerName serverName : localServers) { if (serverName.getPort() == rsPorts[i]) { found = true; break; } } assertTrue(found); } // Wait till master is initialized and all regions are assigned RegionStates regionStates = master.getAssignmentManager().getRegionStates(); int expectedRegions = regionToRegionServerMap.size() + 1; while (!master.isInitialized() || regionStates.getRegionAssignments().size() != expectedRegions) { Threads.sleep(100); } snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection()); snapshot.initialize(); Map<HRegionInfo, ServerName> newRegionToRegionServerMap = snapshot.getRegionToRegionServerMap(); assertEquals(regionToRegionServerMap.size(), newRegionToRegionServerMap.size()); for (Map.Entry<HRegionInfo, ServerName> entry : newRegionToRegionServerMap.entrySet()) { if (TableName.NAMESPACE_TABLE_NAME.equals(entry.getKey().getTable())) continue; ServerName oldServer = regionToRegionServerMap.get(entry.getKey()); ServerName currentServer = entry.getValue(); assertEquals(oldServer.getHostAndPort(), currentServer.getHostAndPort()); assertNotEquals(oldServer.getStartcode(), currentServer.getStartcode()); } }