@Test public void testCountReferencesFailsSplit() throws IOException { final int rowcount = TEST_UTIL.loadRegion(this.parent, CF); assertTrue(rowcount > 0); int parentRowCount = TEST_UTIL.countRows(this.parent); assertEquals(rowcount, parentRowCount); // Start transaction. HRegion spiedRegion = spy(this.parent); SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(spiedRegion); SplitTransactionImpl spiedUponSt = spy(st); doThrow(new IOException("Failing split. Expected reference file count isn't equal.")) .when(spiedUponSt) .assertReferenceFileCount( anyInt(), eq( new Path( this.parent.getRegionFileSystem().getTableDir(), st.getSecondDaughter().getEncodedName()))); // Run the execute. Look at what it returns. boolean expectedException = false; Server mockServer = Mockito.mock(Server.class); when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); try { spiedUponSt.execute(mockServer, null); } catch (IOException e) { expectedException = true; } assertTrue(expectedException); }
/** * Test if the region can be closed properly * * @throws IOException * @throws NodeExistsException * @throws KeeperException * @throws org.apache.hadoop.hbase.exceptions.DeserializationException */ @Test public void testCloseRegion() throws IOException, NodeExistsException, KeeperException, DeserializationException { final Server server = new MockServer(HTU); final RegionServerServices rss = HTU.createMockRegionServerService(); HTableDescriptor htd = TEST_HTD; HRegionInfo hri = TEST_HRI; // open a region first so that it can be closed later OpenRegion(server, rss, htd, hri); // close the region // Create it CLOSING, which is what Master set before sending CLOSE RPC int versionOfClosingNode = ZKAssign.createNodeClosing(server.getZooKeeper(), hri, server.getServerName()); // The CloseRegionHandler will validate the expected version // Given it is set to correct versionOfClosingNode, // CloseRegionHandlerit should be RS_ZK_REGION_CLOSED CloseRegionHandler handler = new CloseRegionHandler(server, rss, hri, false, true, versionOfClosingNode); handler.process(); // Handler should have transitioned it to RS_ZK_REGION_CLOSED RegionTransition rt = RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName())); assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_CLOSED)); }
/** * Test that if we fail a flush, abort gets set on close. * * @see <a href="https://issues.apache.org/jira/browse/HBASE-4270">HBASE-4270</a> * @throws IOException * @throws NodeExistsException * @throws KeeperException */ @Test public void testFailedFlushAborts() throws IOException, NodeExistsException, KeeperException { final Server server = new MockServer(HTU, false); final RegionServerServices rss = HTU.createMockRegionServerService(); HTableDescriptor htd = TEST_HTD; final HRegionInfo hri = new HRegionInfo(htd.getTableName(), HConstants.EMPTY_END_ROW, HConstants.EMPTY_END_ROW); HRegion region = HTU.createLocalHRegion(hri, htd); try { assertNotNull(region); // Spy on the region so can throw exception when close is called. HRegion spy = Mockito.spy(region); final boolean abort = false; Mockito.when(spy.close(abort)).thenThrow(new RuntimeException("Mocked failed close!")); // The CloseRegionHandler will try to get an HRegion that corresponds // to the passed hri -- so insert the region into the online region Set. rss.addToOnlineRegions(spy); // Assert the Server is NOT stopped before we call close region. assertFalse(server.isStopped()); CloseRegionHandler handler = new CloseRegionHandler(server, rss, hri, false, false, -1); boolean throwable = false; try { handler.process(); } catch (Throwable t) { throwable = true; } finally { assertTrue(throwable); // Abort calls stop so stopped flag should be set. assertTrue(server.isStopped()); } } finally { HRegion.closeHRegion(region); } }
@Test public void testRollback() throws IOException { final int rowcount = TEST_UTIL.loadRegion(this.parent, CF); assertTrue(rowcount > 0); int parentRowCount = TEST_UTIL.countRows(this.parent); assertEquals(rowcount, parentRowCount); // Start transaction. HRegion spiedRegion = spy(this.parent); SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(spiedRegion); SplitTransactionImpl spiedUponSt = spy(st); doNothing() .when(spiedUponSt) .assertReferenceFileCount( anyInt(), eq(parent.getRegionFileSystem().getSplitsDir(st.getFirstDaughter()))); when(spiedRegion.createDaughterRegionFromSplits(spiedUponSt.getSecondDaughter())) .thenThrow(new MockedFailedDaughterCreation()); // Run the execute. Look at what it returns. boolean expectedException = false; Server mockServer = Mockito.mock(Server.class); when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); try { spiedUponSt.execute(mockServer, null); } catch (MockedFailedDaughterCreation e) { expectedException = true; } assertTrue(expectedException); // Run rollback assertTrue(spiedUponSt.rollback(null, null)); // Assert I can scan parent. int parentRowCount2 = TEST_UTIL.countRows(this.parent); assertEquals(parentRowCount, parentRowCount2); // Assert rollback cleaned up stuff in fs assertTrue(!this.fs.exists(HRegion.getRegionDir(this.testdir, st.getFirstDaughter()))); assertTrue(!this.fs.exists(HRegion.getRegionDir(this.testdir, st.getSecondDaughter()))); assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread()); // Now retry the split but do not throw an exception this time. assertTrue(st.prepare()); PairOfSameType<Region> daughters = st.execute(mockServer, null); // Count rows. daughters are already open int daughtersRowCount = 0; for (Region openRegion : daughters) { try { int count = TEST_UTIL.countRows(openRegion); assertTrue(count > 0 && count != rowcount); daughtersRowCount += count; } finally { HBaseTestingUtility.closeRegionAndWAL(openRegion); } } assertEquals(rowcount, daughtersRowCount); // Assert the write lock is no longer held on parent assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread()); assertTrue("Rollback hooks should be called.", wasRollBackHookCalled()); }
@Test public void testWholesomeSplit() throws IOException { final int rowcount = TEST_UTIL.loadRegion(this.parent, CF, true); assertTrue(rowcount > 0); int parentRowCount = TEST_UTIL.countRows(this.parent); assertEquals(rowcount, parentRowCount); // Pretend region's blocks are not in the cache, used for // testWholesomeSplitWithHFileV1 CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); ((LruBlockCache) cacheConf.getBlockCache()).clearCache(); // Start transaction. SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(); // Run the execute. Look at what it returns. Server mockServer = Mockito.mock(Server.class); when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); PairOfSameType<Region> daughters = st.execute(mockServer, null); // Do some assertions about execution. assertTrue(this.fs.exists(this.parent.getRegionFileSystem().getSplitsDir())); // Assert the parent region is closed. assertTrue(this.parent.isClosed()); // Assert splitdir is empty -- because its content will have been moved out // to be under the daughter region dirs. assertEquals(0, this.fs.listStatus(this.parent.getRegionFileSystem().getSplitsDir()).length); // Check daughters have correct key span. assertTrue( Bytes.equals( parent.getRegionInfo().getStartKey(), daughters.getFirst().getRegionInfo().getStartKey())); assertTrue(Bytes.equals(GOOD_SPLIT_ROW, daughters.getFirst().getRegionInfo().getEndKey())); assertTrue(Bytes.equals(daughters.getSecond().getRegionInfo().getStartKey(), GOOD_SPLIT_ROW)); assertTrue( Bytes.equals( parent.getRegionInfo().getEndKey(), daughters.getSecond().getRegionInfo().getEndKey())); // Count rows. daughters are already open int daughtersRowCount = 0; for (Region openRegion : daughters) { try { int count = TEST_UTIL.countRows(openRegion); assertTrue(count > 0 && count != rowcount); daughtersRowCount += count; } finally { HBaseTestingUtility.closeRegionAndWAL(openRegion); } } assertEquals(rowcount, daughtersRowCount); // Assert the write lock is no longer held on parent assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread()); }
/** * Inspect the log directory to recover any log file without * an active region server. */ void splitLogAfterStartup() { boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", HLog.SPLIT_SKIP_ERRORS_DEFAULT); Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); do { if (master.isStopped()) { LOG.warn("Master stopped while splitting logs"); break; } List<ServerName> serverNames = new ArrayList<ServerName>(); try { if (!this.fs.exists(logsDirPath)) return; FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null); // Get online servers after getting log folders to avoid log folder deletion of newly // checked in region servers . see HBASE-5916 Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers() .keySet(); if (logFolders == null || logFolders.length == 0) { LOG.debug("No log files to split, proceeding..."); return; } for (FileStatus status : logFolders) { String sn = status.getPath().getName(); // truncate splitting suffix if present (for ServerName parsing) if (sn.endsWith(HLog.SPLITTING_EXT)) { sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); } ServerName serverName = ServerName.parseServerName(sn); if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); serverNames.add(serverName); } else { LOG.info("Log folder " + status.getPath() + " belongs to an existing region server"); } } splitLog(serverNames); retrySplitting = false; } catch (IOException ioe) { LOG.warn("Failed splitting of " + serverNames, ioe); if (!checkFileSystem()) { LOG.warn("Bad Filesystem, exiting"); Runtime.getRuntime().halt(1); } try { if (retrySplitting) { Thread.sleep(conf.getInt( "hbase.hlog.split.failure.retry.interval", 30 * 1000)); } } catch (InterruptedException e) { LOG.warn("Interrupted, aborting since cannot return w/o splitting"); Thread.currentThread().interrupt(); retrySplitting = false; Runtime.getRuntime().halt(1); } } } while (retrySplitting); }
private void OpenRegion( Server server, RegionServerServices rss, HTableDescriptor htd, HRegionInfo hri) throws IOException, NodeExistsException, KeeperException, DeserializationException { // Create it OFFLINE node, which is what Master set before sending OPEN RPC ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName()); OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd); rss.getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE); openHandler.process(); // This parse is not used? RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName())); // delete the node, which is what Master do after the region is opened ZKAssign.deleteNode( server.getZooKeeper(), hri.getEncodedName(), EventType.RS_ZK_REGION_OPENED, server.getServerName()); }
DaughterOpener(final Server s, final Region r) { super( (s == null ? "null-services" : s.getServerName()) + "-daughterOpener=" + r.getRegionInfo().getEncodedName()); setDaemon(true); this.server = s; this.r = r; }
/** * Perform time consuming opening of the merged region. * * @param server Hosting server instance. Can be null when testing * @param services Used to online/offline regions. * @param merged the merged region * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, * RegionServerServices)} */ void openMergedRegion(final Server server, final RegionServerServices services, HRegion merged) throws IOException { boolean stopped = server != null && server.isStopped(); boolean stopping = services != null && services.isStopping(); if (stopped || stopping) { LOG.info( "Not opening merged region " + merged.getRegionNameAsString() + " because stopping=" + stopping + ", stopped=" + stopped); return; } HRegionInfo hri = merged.getRegionInfo(); LoggingProgressable reporter = server == null ? null : new LoggingProgressable( hri, server .getConfiguration() .getLong("hbase.regionserver.regionmerge.open.log.interval", 10000)); merged.openHRegion(reporter); if (services != null) { try { if (useCoordinationForAssignment) { services.postOpenDeployTasks(merged); } else if (!services.reportRegionStateTransition( TransitionCode.MERGED, mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) { throw new IOException( "Failed to report merged region to master: " + mergedRegionInfo.getShortNameToLog()); } services.addToOnlineRegions(merged); } catch (KeeperException ke) { throw new IOException(ke); } } }
public HLogSplitterHandler( final Server server, String curTask, final MutableInt curTaskZKVersion, CancelableProgressable reporter, AtomicInteger inProgressTasks, TaskExecutor splitTaskExecutor, RecoveryMode mode) { super(server, EventType.RS_LOG_REPLAY); this.curTask = curTask; this.wal = ZKSplitLog.getFileName(curTask); this.reporter = reporter; this.inProgressTasks = inProgressTasks; this.inProgressTasks.incrementAndGet(); this.serverName = server.getServerName(); this.zkw = server.getZooKeeper(); this.curTaskZKVersion = curTaskZKVersion; this.splitTaskExecutor = splitTaskExecutor; this.mode = mode; }
/** * Checks to see if the file system is still accessible. * If not, sets closed * @return false if file system is not available */ public boolean checkFileSystem() { if (this.fsOk) { try { FSUtils.checkFileSystemAvailable(this.fs); FSUtils.checkDfsSafeMode(this.conf); } catch (IOException e) { master.abort("Shutting down HBase cluster: file system not available", e); this.fsOk = false; } } return this.fsOk; }
/** * Perform time consuming opening of the daughter regions. * * @param server Hosting server instance. Can be null when testing * @param services Used to online/offline regions. * @param a first daughter region * @param a second daughter region * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, * RegionServerServices)} */ @VisibleForTesting void openDaughters(final Server server, final RegionServerServices services, Region a, Region b) throws IOException { boolean stopped = server != null && server.isStopped(); boolean stopping = services != null && services.isStopping(); // TODO: Is this check needed here? if (stopped || stopping) { LOG.info( "Not opening daughters " + b.getRegionInfo().getRegionNameAsString() + " and " + a.getRegionInfo().getRegionNameAsString() + " because stopping=" + stopping + ", stopped=" + stopped); } else { // Open daughters in parallel. DaughterOpener aOpener = new DaughterOpener(server, a); DaughterOpener bOpener = new DaughterOpener(server, b); aOpener.start(); bOpener.start(); try { aOpener.join(); if (aOpener.getException() == null) { transition(SplitTransactionPhase.OPENED_REGION_A); } bOpener.join(); if (bOpener.getException() == null) { transition(SplitTransactionPhase.OPENED_REGION_B); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } if (aOpener.getException() != null) { throw new IOException("Failed " + aOpener.getName(), aOpener.getException()); } if (bOpener.getException() != null) { throw new IOException("Failed " + bOpener.getName(), bOpener.getException()); } if (services != null) { if (!services.reportRegionStateTransition( TransitionCode.SPLIT, parent.getRegionInfo(), hri_a, hri_b)) { throw new IOException( "Failed to report split region to master: " + parent.getRegionInfo().getShortNameToLog()); } // Should add it to OnlineRegions services.addToOnlineRegions(b); services.addToOnlineRegions(a); } } }
/** Test SplitTransactionListener */ @Test public void testSplitTransactionListener() throws IOException { SplitTransactionImpl st = new SplitTransactionImpl(this.parent, GOOD_SPLIT_ROW); SplitTransaction.TransactionListener listener = Mockito.mock(SplitTransaction.TransactionListener.class); st.registerTransactionListener(listener); st.prepare(); Server mockServer = Mockito.mock(Server.class); when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); PairOfSameType<Region> daughters = st.execute(mockServer, null); verify(listener) .transition( st, SplitTransaction.SplitTransactionPhase.STARTED, SplitTransaction.SplitTransactionPhase.PREPARED); verify(listener, times(15)) .transition( any(SplitTransaction.class), any(SplitTransaction.SplitTransactionPhase.class), any(SplitTransaction.SplitTransactionPhase.class)); verifyNoMoreInteractions(listener); }
/** * Run the transaction. * * @param server Hosting server instance. Can be null when testing * @param services Used to online/offline regions. * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, * RegionServerServices)} * @return merged region * @throws IOException * @see #rollback(Server, RegionServerServices) */ public HRegion execute(final Server server, final RegionServerServices services) throws IOException { useCoordinationForAssignment = server == null ? true : ConfigUtil.useZKForAssignment(server.getConfiguration()); if (rmd == null) { rmd = server != null && server.getCoordinatedStateManager() != null ? ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .getDefaultDetails() : null; } if (rsCoprocessorHost == null) { rsCoprocessorHost = server != null ? ((HRegionServer) server).getRegionServerCoprocessorHost() : null; } HRegion mergedRegion = createMergedRegion(server, services); if (rsCoprocessorHost != null) { rsCoprocessorHost.postMergeCommit(this.region_a, this.region_b, mergedRegion); } return stepsAfterPONR(server, services, mergedRegion); }
/** * Open daughter regions, add them to online list and update meta. * * @param server * @param daughter * @throws IOException * @throws KeeperException */ @VisibleForTesting void openDaughterRegion(final Server server, final Region daughter) throws IOException, KeeperException { HRegionInfo hri = daughter.getRegionInfo(); LoggingProgressable reporter = server == null ? null : new LoggingProgressable( hri, server .getConfiguration() .getLong("hbase.regionserver.split.daughter.open.log.interval", 10000)); ((HRegion) daughter).openHRegion(reporter); }
@Test public void testFailAfterPONR() throws IOException, KeeperException { final int rowcount = TEST_UTIL.loadRegion(this.parent, CF); assertTrue(rowcount > 0); int parentRowCount = TEST_UTIL.countRows(this.parent); assertEquals(rowcount, parentRowCount); // Start transaction. SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(); SplitTransactionImpl spiedUponSt = spy(st); Mockito.doThrow(new MockedFailedDaughterOpen()) .when(spiedUponSt) .openDaughterRegion((Server) Mockito.anyObject(), (HRegion) Mockito.anyObject()); // Run the execute. Look at what it returns. boolean expectedException = false; Server mockServer = Mockito.mock(Server.class); when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); try { spiedUponSt.execute(mockServer, null); } catch (IOException e) { if (e.getCause() != null && e.getCause() instanceof MockedFailedDaughterOpen) { expectedException = true; } } assertTrue(expectedException); // Run rollback returns that we should restart. assertFalse(spiedUponSt.rollback(null, null)); // Make sure that region a and region b are still in the filesystem, that // they have not been removed; this is supposed to be the case if we go // past point of no return. Path tableDir = this.parent.getRegionFileSystem().getTableDir(); Path daughterADir = new Path(tableDir, spiedUponSt.getFirstDaughter().getEncodedName()); Path daughterBDir = new Path(tableDir, spiedUponSt.getSecondDaughter().getEncodedName()); assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterADir)); assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterBDir)); }
public MasterFileSystem(Server master, MasterServices services, MasterMetrics metrics, boolean masterRecovery) throws IOException { this.conf = master.getConfiguration(); this.master = master; this.services = services; this.metrics = metrics; // Set filesystem to be that of this.rootdir else we get complaints about // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is // default localfs. Presumption is that rootdir is fully-qualified before // we get to here with appropriate fs scheme. //设置HBase根目录 this.rootdir = FSUtils.getRootDir(conf); //HBase表创建和删除的临时目录/${hbase.rootdir}/.tmp this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY); // Cover both bases, the old way of setting default fs and the new. // We're supposed to run on 0.20 and 0.21 anyways. this.fs = this.rootdir.getFileSystem(conf); String fsUri = this.fs.getUri().toString(); conf.set("fs.default.name", fsUri); conf.set("fs.defaultFS", fsUri); // make sure the fs has the same conf fs.setConf(conf); this.distributedLogSplitting = conf.getBoolean("hbase.master.distributed.log.splitting", true); if (this.distributedLogSplitting) { this.splitLogManager = new SplitLogManager(master.getZooKeeper(), master.getConfiguration(), master, master.getServerName().toString()); this.splitLogManager.finishInitialization(masterRecovery); } else { this.splitLogManager = null; } // setup the filesystem variable // set up the archived logs path this.oldLogDir = createInitialFileSystemLayout(); }
public HRegion stepsAfterPONR( final Server server, final RegionServerServices services, HRegion mergedRegion) throws IOException { openMergedRegion(server, services, mergedRegion); if (useCoordination(server)) { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .completeRegionMergeTransaction( services, mergedRegionInfo, region_a, region_b, rmd, mergedRegion); } if (rsCoprocessorHost != null) { rsCoprocessorHost.postMerge(this.region_a, this.region_b, mergedRegion); } return mergedRegion; }
public WALSplitterHandler( final Server server, SplitLogWorkerCoordination coordination, SplitLogWorkerCoordination.SplitTaskDetails splitDetails, CancelableProgressable reporter, AtomicInteger inProgressTasks, TaskExecutor splitTaskExecutor, RecoveryMode mode) { super(server, EventType.RS_LOG_REPLAY); this.splitTaskDetails = splitDetails; this.coordination = coordination; this.reporter = reporter; this.inProgressTasks = inProgressTasks; this.inProgressTasks.incrementAndGet(); this.serverName = server.getServerName(); this.splitTaskExecutor = splitTaskExecutor; this.mode = mode; }
/** * Reproduce locking up that happens when we get an inopportune sync during setup for zigzaglatch * wait. See HBASE-14317. If below is broken, we will see this test timeout because it is locked * up. * * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to set up * a dodgy WAL that will throw an exception when we go to append to it. */ @Test(timeout = 20000) public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException { // A WAL that we can have throw exceptions when a flag is set. class DodgyFSLog extends FSHLog { // Set this when want the WAL to start throwing exceptions. volatile boolean throwException = false; // Latch to hold up processing until after another operation has had time to run. CountDownLatch latch = new CountDownLatch(1); public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException { super(fs, root, logDir, conf); } @Override protected void afterCreatingZigZagLatch() { // If throwException set, then append will throw an exception causing the WAL to be // rolled. We'll come in here. Hold up processing until a sync can get in before // the zigzag has time to complete its setup and get its own sync in. This is what causes // the lock up we've seen in production. if (throwException) { try { LOG.info("LATCHED"); // So, timing can have it that the test can run and the bad flush below happens // before we get here. In this case, we'll be stuck waiting on this latch but there // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint... // because all WALs have rolled. In this case, just give up on test. if (!this.latch.await(5, TimeUnit.SECONDS)) { LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!"); } } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } @Override protected void beforeWaitOnSafePoint() { if (throwException) { LOG.info("COUNTDOWN"); // Don't countdown latch until someone waiting on it otherwise, the above // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll // be stuck; test won't go down while (this.latch.getCount() <= 0) Threads.sleep(1); this.latch.countDown(); } } @Override protected Writer createWriterInstance(Path path) throws IOException { final Writer w = super.createWriterInstance(path); return new Writer() { @Override public void close() throws IOException { w.close(); } @Override public void sync() throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...SYNC"); } w.sync(); } @Override public void append(Entry entry) throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...APPEND"); } w.append(entry); } @Override public long getLength() { return w.getLength(); } }; } } // Mocked up server and regionserver services. Needed below. Server server = Mockito.mock(Server.class); Mockito.when(server.getConfiguration()).thenReturn(CONF); Mockito.when(server.isStopped()).thenReturn(false); Mockito.when(server.isAborted()).thenReturn(false); RegionServerServices services = Mockito.mock(RegionServerServices.class); // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test. FileSystem fs = FileSystem.get(CONF); Path rootDir = new Path(dir + getName()); DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF); Path originalWAL = dodgyWAL.getCurrentFileName(); // I need a log roller running. LogRoller logRoller = new LogRoller(server, services); logRoller.addWAL(dodgyWAL); // There is no 'stop' once a logRoller is running.. it just dies. logRoller.start(); // Now get a region and start adding in edits. HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME); final HRegion region = initHRegion(tableName, null, null, dodgyWAL); byte[] bytes = Bytes.toBytes(getName()); NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); scopes.put(COLUMN_FAMILY_BYTES, 0); MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(); try { // First get something into memstore. Make a Put and then pull the Cell out of it. Will // manage append and sync carefully in below to manufacture hang. We keep adding same // edit. WAL subsystem doesn't care. Put put = new Put(bytes); put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes); WALKey key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes); WALEdit edit = new WALEdit(); CellScanner CellScanner = put.cellScanner(); assertTrue(CellScanner.advance()); edit.add(CellScanner.current()); // Put something in memstore and out in the WAL. Do a big number of appends so we push // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL for (int i = 0; i < 1000; i++) { region.put(put); } // Set it so we start throwing exceptions. LOG.info("SET throwing of exception on append"); dodgyWAL.throwException = true; // This append provokes a WAL roll request dodgyWAL.append(region.getRegionInfo(), key, edit, true); boolean exception = false; try { dodgyWAL.sync(); } catch (Exception e) { exception = true; } assertTrue("Did not get sync exception", exception); // Get a memstore flush going too so we have same hung profile as up in the issue over // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up // by the zigzaglatch waiting on syncs to come home. Thread t = new Thread("Flusher") { public void run() { try { if (region.getMemstoreSize() <= 0) { throw new IOException("memstore size=" + region.getMemstoreSize()); } region.flush(false); } catch (IOException e) { // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later // when roll completes. LOG.info("In flush", e); } LOG.info("Exiting"); }; }; t.setDaemon(true); t.start(); // Wait until while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1); // Now assert I got a new WAL file put in place even though loads of errors above. assertTrue(originalWAL != dodgyWAL.getCurrentFileName()); // Can I append to it? dodgyWAL.throwException = false; try { region.put(put); } catch (Exception e) { LOG.info("In the put", e); } } finally { // To stop logRoller, its server has to say it is stopped. Mockito.when(server.isStopped()).thenReturn(true); if (logRoller != null) logRoller.close(); try { if (region != null) region.close(); if (dodgyWAL != null) dodgyWAL.close(); } catch (Exception e) { LOG.info("On way out", e); } } }
@Test public void testLogCleaning() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); // set TTL long ttl = 10000; conf.setLong("hbase.master.logcleaner.ttl", ttl); conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT); Replication.decorateMasterConfiguration(conf); Server server = new DummyServer(); ReplicationQueues repQueues = ReplicationFactory.getReplicationQueues(server.getZooKeeper(), conf, server); repQueues.init(server.getServerName().toString()); final Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(), HConstants.HREGION_OLDLOGDIR_NAME); String fakeMachineName = URLEncoder.encode(server.getServerName().toString(), "UTF8"); final FileSystem fs = FileSystem.get(conf); // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files long now = System.currentTimeMillis(); fs.delete(oldLogDir, true); fs.mkdirs(oldLogDir); // Case 1: 2 invalid files, which would be deleted directly fs.createNewFile(new Path(oldLogDir, "a")); fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a")); // Case 2: 1 "recent" file, not even deletable for the first log cleaner // (TimeToLiveLogCleaner), so we are not going down the chain System.out.println("Now is: " + now); for (int i = 1; i < 31; i++) { // Case 3: old files which would be deletable for the first log cleaner // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner) Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i)); fs.createNewFile(fileName); // Case 4: put 3 old log files in ZK indicating that they are scheduled // for replication so these files would pass the first log cleaner // (TimeToLiveLogCleaner) but would be rejected by the second // (ReplicationLogCleaner) if (i % (30 / 3) == 1) { repQueues.addLog(fakeMachineName, fileName.getName()); System.out.println("Replication log file: " + fileName); } } // sleep for sometime to get newer modifcation time Thread.sleep(ttl); fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now)); // Case 2: 1 newer file, not even deletable for the first log cleaner // (TimeToLiveLogCleaner), so we are not going down the chain fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000))); for (FileStatus stat : fs.listStatus(oldLogDir)) { System.out.println(stat.getPath().toString()); } assertEquals(34, fs.listStatus(oldLogDir).length); LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, oldLogDir); cleaner.chore(); // We end up with the current log file, a newer one and the 3 old log // files which are scheduled for replication TEST_UTIL.waitFor( 1000, new Waiter.Predicate<Exception>() { @Override public boolean evaluate() throws Exception { return 5 == fs.listStatus(oldLogDir).length; } }); for (FileStatus file : fs.listStatus(oldLogDir)) { System.out.println("Kept log files: " + file.getPath().getName()); } }
/** * Prepare the merged region and region files. * * @param server Hosting server instance. Can be null when testing * @param services Used to online/offline regions. * @return merged region * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, * RegionServerServices)} */ HRegion createMergedRegion(final Server server, final RegionServerServices services) throws IOException { LOG.info( "Starting merge of " + region_a + " and " + region_b.getRegionNameAsString() + ", forcible=" + forcible); if ((server != null && server.isStopped()) || (services != null && services.isStopping())) { throw new IOException("Server is stopped or stopping"); } if (rsCoprocessorHost != null) { if (rsCoprocessorHost.preMerge(this.region_a, this.region_b)) { throw new IOException( "Coprocessor bypassing regions " + this.region_a + " " + this.region_b + " merge."); } } // If true, no cluster to write meta edits to or to use coordination. boolean testing = server == null ? true : server.getConfiguration().getBoolean("hbase.testing.nocluster", false); HRegion mergedRegion = stepsBeforePONR(server, services, testing); @MetaMutationAnnotation List<Mutation> metaEntries = new ArrayList<Mutation>(); if (rsCoprocessorHost != null) { if (rsCoprocessorHost.preMergeCommit(this.region_a, this.region_b, metaEntries)) { throw new IOException( "Coprocessor bypassing regions " + this.region_a + " " + this.region_b + " merge."); } try { for (Mutation p : metaEntries) { HRegionInfo.parseRegionName(p.getRow()); } } catch (IOException e) { LOG.error( "Row key of mutation from coprocessor is not parsable as region name." + "Mutations from coprocessor should only be for hbase:meta table.", e); throw e; } } // This is the point of no return. Similar with SplitTransaction. // IF we reach the PONR then subsequent failures need to crash out this // regionserver this.journal.add(JournalEntry.PONR); // Add merged region and delete region_a and region_b // as an atomic update. See HBASE-7721. This update to hbase:meta makes the region // will determine whether the region is merged or not in case of failures. // If it is successful, master will roll-forward, if not, master will // rollback if (!testing && useCoordinationForAssignment) { if (metaEntries.isEmpty()) { MetaTableAccessor.mergeRegions( server.getConnection(), mergedRegion.getRegionInfo(), region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), region_a.getTableDesc().getRegionReplication()); } else { mergeRegionsAndPutMetaEntries( server.getConnection(), mergedRegion.getRegionInfo(), region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), metaEntries, region_a.getTableDesc().getRegionReplication()); } } else if (services != null && !useCoordinationForAssignment) { if (!services.reportRegionStateTransition( TransitionCode.MERGE_PONR, mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) { // Passed PONR, let SSH clean it up throw new IOException( "Failed to notify master that merge passed PONR: " + region_a.getRegionInfo().getRegionNameAsString() + " and " + region_b.getRegionInfo().getRegionNameAsString()); } } return mergedRegion; }
private boolean useCoordination(final Server server) { return server != null && useCoordinationForAssignment && server.getCoordinatedStateManager() != null; }
/** * Reproduce locking up that happens when there's no further syncs after append fails, and causing * an isolated sync then infinite wait. See HBASE-16960. If below is broken, we will see this test * timeout because it is locked up. * * <p>Steps for reproduce:<br> * 1. Trigger server abort through dodgyWAL1<br> * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer event handler thread * sleep for a while thus keeping {@code endOfBatch} false<br> * 3. Publish a sync then an append which will throw exception, check whether the sync could * return */ @Test(timeout = 20000) public void testLockup16960() throws IOException { // A WAL that we can have throw exceptions when a flag is set. class DodgyFSLog extends FSHLog { // Set this when want the WAL to start throwing exceptions. volatile boolean throwException = false; public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException { super(fs, root, logDir, conf); } @Override protected Writer createWriterInstance(Path path) throws IOException { final Writer w = super.createWriterInstance(path); return new Writer() { @Override public void close() throws IOException { w.close(); } @Override public void sync() throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...SYNC"); } w.sync(); } @Override public void append(Entry entry) throws IOException { if (throwException) { throw new IOException("FAKE! Failed to replace a bad datanode...APPEND"); } w.append(entry); } @Override public long getLength() { return w.getLength(); } }; } @Override protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter) throws IOException { if (throwException) { throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath); } long oldFileLen = 0L; oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter); return oldFileLen; } } // Mocked up server and regionserver services. Needed below. Server server = new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString()); RegionServerServices services = Mockito.mock(RegionServerServices.class); CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000); // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, // go ahead with test. FileSystem fs = FileSystem.get(CONF); Path rootDir = new Path(dir + getName()); DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF); Path rootDir2 = new Path(dir + getName() + "2"); final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF); // Add a listener to force ringbuffer event handler sleep for a while dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener()); // I need a log roller running. LogRoller logRoller = new LogRoller(server, services); logRoller.addWAL(dodgyWAL1); logRoller.addWAL(dodgyWAL2); // There is no 'stop' once a logRoller is running.. it just dies. logRoller.start(); // Now get a region and start adding in edits. HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME); final HRegion region = initHRegion(tableName, null, null, dodgyWAL1); byte[] bytes = Bytes.toBytes(getName()); NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); scopes.put(COLUMN_FAMILY_BYTES, 0); MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(); try { Put put = new Put(bytes); put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes); WALKey key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes); WALEdit edit = new WALEdit(); CellScanner CellScanner = put.cellScanner(); assertTrue(CellScanner.advance()); edit.add(CellScanner.current()); LOG.info("SET throwing of exception on append"); dodgyWAL1.throwException = true; // This append provokes a WAL roll request dodgyWAL1.append(region.getRegionInfo(), key, edit, true); boolean exception = false; try { dodgyWAL1.sync(); } catch (Exception e) { exception = true; } assertTrue("Did not get sync exception", exception); // LogRoller call dodgyWAL1.rollWriter get FailedLogCloseException and // cause server abort. try { // wait LogRoller exit. Thread.sleep(50); } catch (InterruptedException e) { e.printStackTrace(); } final CountDownLatch latch = new CountDownLatch(1); // make RingBufferEventHandler sleep 1s, so the following sync // endOfBatch=false key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc, scopes); dodgyWAL2.append(region.getRegionInfo(), key, edit, true); Thread t = new Thread("Sync") { public void run() { try { dodgyWAL2.sync(); } catch (IOException e) { LOG.info("In sync", e); } latch.countDown(); LOG.info("Sync exiting"); }; }; t.setDaemon(true); t.start(); try { // make sure sync have published. Thread.sleep(100); } catch (InterruptedException e1) { e1.printStackTrace(); } // make append throw DamagedWALException key = new WALKey( region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc, scopes); dodgyWAL2.append(region.getRegionInfo(), key, edit, true); while (latch.getCount() > 0) { Threads.sleep(100); } assertTrue(server.isAborted()); } finally { if (logRoller != null) { logRoller.close(); } try { if (region != null) { region.close(); } if (dodgyWAL1 != null) { dodgyWAL1.close(); } if (dodgyWAL2 != null) { dodgyWAL2.close(); } } catch (Exception e) { LOG.info("On way out", e); } } }
/** * Prepare the regions and region files. * * @param server Hosting server instance. Can be null when testing (won't try and update in zk if * a null server) * @param services Used to online/offline regions. * @param user * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, * RegionServerServices)} * @return Regions created */ @VisibleForTesting PairOfSameType<Region> createDaughters( final Server server, final RegionServerServices services, User user) throws IOException { LOG.info("Starting split of region " + this.parent); if ((server != null && server.isStopped()) || (services != null && services.isStopping())) { throw new IOException("Server is stopped or stopping"); } assert !this.parent.lock.writeLock().isHeldByCurrentThread() : "Unsafe to hold write lock while performing RPCs"; transition(SplitTransactionPhase.BEFORE_PRE_SPLIT_HOOK); // Coprocessor callback if (this.parent.getCoprocessorHost() != null) { // TODO: Remove one of these parent.getCoprocessorHost().preSplit(user); parent.getCoprocessorHost().preSplit(splitrow, user); } transition(SplitTransactionPhase.AFTER_PRE_SPLIT_HOOK); // If true, no cluster to write meta edits to or to update znodes in. boolean testing = server == null ? true : server.getConfiguration().getBoolean("hbase.testing.nocluster", false); this.fileSplitTimeout = testing ? this.fileSplitTimeout : server .getConfiguration() .getLong("hbase.regionserver.fileSplitTimeout", this.fileSplitTimeout); PairOfSameType<Region> daughterRegions = stepsBeforePONR(server, services, testing); final List<Mutation> metaEntries = new ArrayList<Mutation>(); boolean ret = false; if (this.parent.getCoprocessorHost() != null) { ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries, user); if (ret) { throw new IOException( "Coprocessor bypassing region " + parent.getRegionInfo().getRegionNameAsString() + " split."); } try { for (Mutation p : metaEntries) { HRegionInfo.parseRegionName(p.getRow()); } } catch (IOException e) { LOG.error( "Row key of mutation from coprossor is not parsable as region name." + "Mutations from coprocessor should only for hbase:meta table."); throw e; } } // This is the point of no return. Adding subsequent edits to .META. as we // do below when we do the daughter opens adding each to .META. can fail in // various interesting ways the most interesting of which is a timeout // BUT the edits all go through (See HBASE-3872). IF we reach the PONR // then subsequent failures need to crash out this regionserver; the // server shutdown processing should be able to fix-up the incomplete split. // The offlined parent will have the daughters as extra columns. If // we leave the daughter regions in place and do not remove them when we // crash out, then they will have their references to the parent in place // still and the server shutdown fixup of .META. will point to these // regions. // We should add PONR JournalEntry before offlineParentInMeta,so even if // OfflineParentInMeta timeout,this will cause regionserver exit,and then // master ServerShutdownHandler will fix daughter & avoid data loss. (See // HBase-4562). transition(SplitTransactionPhase.PONR); // Edit parent in meta. Offlines parent region and adds splita and splitb // as an atomic update. See HBASE-7721. This update to META makes the region // will determine whether the region is split or not in case of failures. // If it is successful, master will roll-forward, if not, master will rollback // and assign the parent region. if (services != null && !services.reportRegionStateTransition( TransitionCode.SPLIT_PONR, parent.getRegionInfo(), hri_a, hri_b)) { // Passed PONR, let SSH clean it up throw new IOException( "Failed to notify master that split passed PONR: " + parent.getRegionInfo().getRegionNameAsString()); } return daughterRegions; }
public HRegion stepsBeforePONR( final Server server, final RegionServerServices services, boolean testing) throws IOException { if (rmd == null) { rmd = server != null && server.getCoordinatedStateManager() != null ? ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .getDefaultDetails() : null; } // If server doesn't have a coordination state manager, don't do coordination actions. if (useCoordination(server)) { try { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .startRegionMergeTransaction( mergedRegionInfo, server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo()); } catch (IOException e) { throw new IOException( "Failed to start region merge transaction for " + this.mergedRegionInfo.getRegionNameAsString(), e); } } else if (services != null && !useCoordinationForAssignment) { if (!services.reportRegionStateTransition( TransitionCode.READY_TO_MERGE, mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) { throw new IOException( "Failed to get ok from master to merge " + region_a.getRegionInfo().getRegionNameAsString() + " and " + region_b.getRegionInfo().getRegionNameAsString()); } } this.journal.add(JournalEntry.SET_MERGING); if (useCoordination(server)) { // After creating the merge node, wait for master to transition it // from PENDING_MERGE to MERGING so that we can move on. We want master // knows about it and won't transition any region which is merging. ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .waitForRegionMergeTransaction(services, mergedRegionInfo, region_a, region_b, rmd); } this.region_a.getRegionFileSystem().createMergesDir(); this.journal.add(JournalEntry.CREATED_MERGE_DIR); Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(services, this.region_a, true, testing); Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(services, this.region_b, false, testing); assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null; // // mergeStoreFiles creates merged region dirs under the region_a merges dir // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will // clean this up. mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB); if (useCoordination(server)) { try { // Do the final check in case any merging region is moved somehow. If so, the transition // will fail. ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .confirmRegionMergeTransaction( this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), rmd); } catch (IOException e) { throw new IOException( "Failed setting MERGING on " + this.mergedRegionInfo.getRegionNameAsString(), e); } } // Log to the journal that we are creating merged region. We could fail // halfway through. If we do, we could have left // stuff in fs that needs cleanup -- a storefile or two. Thats why we // add entry to journal BEFORE rather than AFTER the change. this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION); HRegion mergedRegion = createMergedRegionFromMerges(this.region_a, this.region_b, this.mergedRegionInfo); return mergedRegion; }
/** * @param server Hosting server instance (May be null when testing). * @param services Services of regionserver, used to online regions. * @throws IOException If thrown, rollback failed. Take drastic action. * @return True if we successfully rolled back, false if we got to the point of no return and so * now need to abort the server to minimize damage. */ @SuppressWarnings("deprecation") public boolean rollback(final Server server, final RegionServerServices services) throws IOException { assert this.mergedRegionInfo != null; // Coprocessor callback if (rsCoprocessorHost != null) { rsCoprocessorHost.preRollBackMerge(this.region_a, this.region_b); } boolean result = true; ListIterator<JournalEntry> iterator = this.journal.listIterator(this.journal.size()); // Iterate in reverse. while (iterator.hasPrevious()) { JournalEntry je = iterator.previous(); switch (je) { case SET_MERGING: if (useCoordination(server)) { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getRegionMergeCoordination() .clean(this.mergedRegionInfo); } else if (services != null && !useCoordinationForAssignment && !services.reportRegionStateTransition( TransitionCode.MERGE_REVERTED, mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) { return false; } break; case CREATED_MERGE_DIR: this.region_a.writestate.writesEnabled = true; this.region_b.writestate.writesEnabled = true; this.region_a.getRegionFileSystem().cleanupMergesDir(); break; case CLOSED_REGION_A: try { // So, this returns a seqid but if we just closed and then reopened, // we should be ok. On close, we flushed using sequenceid obtained // from hosting regionserver so no need to propagate the sequenceid // returned out of initialize below up into regionserver as we // normally do. this.region_a.initialize(); } catch (IOException e) { LOG.error( "Failed rollbacking CLOSED_REGION_A of region " + this.region_a.getRegionNameAsString(), e); throw new RuntimeException(e); } break; case OFFLINED_REGION_A: if (services != null) services.addToOnlineRegions(this.region_a); break; case CLOSED_REGION_B: try { this.region_b.initialize(); } catch (IOException e) { LOG.error( "Failed rollbacking CLOSED_REGION_A of region " + this.region_b.getRegionNameAsString(), e); throw new RuntimeException(e); } break; case OFFLINED_REGION_B: if (services != null) services.addToOnlineRegions(this.region_b); break; case STARTED_MERGED_REGION_CREATION: this.region_a.getRegionFileSystem().cleanupMergedRegion(this.mergedRegionInfo); break; case PONR: // We got to the point-of-no-return so we need to just abort. Return // immediately. Do not clean up created merged regions. return false; default: throw new RuntimeException("Unhandled journal entry: " + je); } } // Coprocessor callback if (rsCoprocessorHost != null) { rsCoprocessorHost.postRollBackMerge(this.region_a, this.region_b); } return result; }