Exemplo n.º 1
0
  @Test
  public void testCountReferencesFailsSplit() throws IOException {
    final int rowcount = TEST_UTIL.loadRegion(this.parent, CF);
    assertTrue(rowcount > 0);
    int parentRowCount = TEST_UTIL.countRows(this.parent);
    assertEquals(rowcount, parentRowCount);

    // Start transaction.
    HRegion spiedRegion = spy(this.parent);
    SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(spiedRegion);
    SplitTransactionImpl spiedUponSt = spy(st);
    doThrow(new IOException("Failing split. Expected reference file count isn't equal."))
        .when(spiedUponSt)
        .assertReferenceFileCount(
            anyInt(),
            eq(
                new Path(
                    this.parent.getRegionFileSystem().getTableDir(),
                    st.getSecondDaughter().getEncodedName())));

    // Run the execute.  Look at what it returns.
    boolean expectedException = false;
    Server mockServer = Mockito.mock(Server.class);
    when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
    try {
      spiedUponSt.execute(mockServer, null);
    } catch (IOException e) {
      expectedException = true;
    }
    assertTrue(expectedException);
  }
  /**
   * Test if the region can be closed properly
   *
   * @throws IOException
   * @throws NodeExistsException
   * @throws KeeperException
   * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
   */
  @Test
  public void testCloseRegion()
      throws IOException, NodeExistsException, KeeperException, DeserializationException {
    final Server server = new MockServer(HTU);
    final RegionServerServices rss = HTU.createMockRegionServerService();

    HTableDescriptor htd = TEST_HTD;
    HRegionInfo hri = TEST_HRI;

    // open a region first so that it can be closed later
    OpenRegion(server, rss, htd, hri);

    // close the region
    // Create it CLOSING, which is what Master set before sending CLOSE RPC
    int versionOfClosingNode =
        ZKAssign.createNodeClosing(server.getZooKeeper(), hri, server.getServerName());

    // The CloseRegionHandler will validate the expected version
    // Given it is set to correct versionOfClosingNode,
    // CloseRegionHandlerit should be RS_ZK_REGION_CLOSED
    CloseRegionHandler handler =
        new CloseRegionHandler(server, rss, hri, false, true, versionOfClosingNode);
    handler.process();
    // Handler should have transitioned it to RS_ZK_REGION_CLOSED
    RegionTransition rt =
        RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()));
    assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_CLOSED));
  }
 /**
  * Test that if we fail a flush, abort gets set on close.
  *
  * @see <a href="https://issues.apache.org/jira/browse/HBASE-4270">HBASE-4270</a>
  * @throws IOException
  * @throws NodeExistsException
  * @throws KeeperException
  */
 @Test
 public void testFailedFlushAborts() throws IOException, NodeExistsException, KeeperException {
   final Server server = new MockServer(HTU, false);
   final RegionServerServices rss = HTU.createMockRegionServerService();
   HTableDescriptor htd = TEST_HTD;
   final HRegionInfo hri =
       new HRegionInfo(htd.getTableName(), HConstants.EMPTY_END_ROW, HConstants.EMPTY_END_ROW);
   HRegion region = HTU.createLocalHRegion(hri, htd);
   try {
     assertNotNull(region);
     // Spy on the region so can throw exception when close is called.
     HRegion spy = Mockito.spy(region);
     final boolean abort = false;
     Mockito.when(spy.close(abort)).thenThrow(new RuntimeException("Mocked failed close!"));
     // The CloseRegionHandler will try to get an HRegion that corresponds
     // to the passed hri -- so insert the region into the online region Set.
     rss.addToOnlineRegions(spy);
     // Assert the Server is NOT stopped before we call close region.
     assertFalse(server.isStopped());
     CloseRegionHandler handler = new CloseRegionHandler(server, rss, hri, false, false, -1);
     boolean throwable = false;
     try {
       handler.process();
     } catch (Throwable t) {
       throwable = true;
     } finally {
       assertTrue(throwable);
       // Abort calls stop so stopped flag should be set.
       assertTrue(server.isStopped());
     }
   } finally {
     HRegion.closeHRegion(region);
   }
 }
Exemplo n.º 4
0
  @Test
  public void testRollback() throws IOException {
    final int rowcount = TEST_UTIL.loadRegion(this.parent, CF);
    assertTrue(rowcount > 0);
    int parentRowCount = TEST_UTIL.countRows(this.parent);
    assertEquals(rowcount, parentRowCount);

    // Start transaction.
    HRegion spiedRegion = spy(this.parent);
    SplitTransactionImpl st = prepareGOOD_SPLIT_ROW(spiedRegion);
    SplitTransactionImpl spiedUponSt = spy(st);
    doNothing()
        .when(spiedUponSt)
        .assertReferenceFileCount(
            anyInt(), eq(parent.getRegionFileSystem().getSplitsDir(st.getFirstDaughter())));
    when(spiedRegion.createDaughterRegionFromSplits(spiedUponSt.getSecondDaughter()))
        .thenThrow(new MockedFailedDaughterCreation());
    // Run the execute.  Look at what it returns.
    boolean expectedException = false;
    Server mockServer = Mockito.mock(Server.class);
    when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
    try {
      spiedUponSt.execute(mockServer, null);
    } catch (MockedFailedDaughterCreation e) {
      expectedException = true;
    }
    assertTrue(expectedException);
    // Run rollback
    assertTrue(spiedUponSt.rollback(null, null));

    // Assert I can scan parent.
    int parentRowCount2 = TEST_UTIL.countRows(this.parent);
    assertEquals(parentRowCount, parentRowCount2);

    // Assert rollback cleaned up stuff in fs
    assertTrue(!this.fs.exists(HRegion.getRegionDir(this.testdir, st.getFirstDaughter())));
    assertTrue(!this.fs.exists(HRegion.getRegionDir(this.testdir, st.getSecondDaughter())));
    assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread());

    // Now retry the split but do not throw an exception this time.
    assertTrue(st.prepare());
    PairOfSameType<Region> daughters = st.execute(mockServer, null);
    // Count rows. daughters are already open
    int daughtersRowCount = 0;
    for (Region openRegion : daughters) {
      try {
        int count = TEST_UTIL.countRows(openRegion);
        assertTrue(count > 0 && count != rowcount);
        daughtersRowCount += count;
      } finally {
        HBaseTestingUtility.closeRegionAndWAL(openRegion);
      }
    }
    assertEquals(rowcount, daughtersRowCount);
    // Assert the write lock is no longer held on parent
    assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread());
    assertTrue("Rollback hooks should be called.", wasRollBackHookCalled());
  }
Exemplo n.º 5
0
  @Test
  public void testWholesomeSplit() throws IOException {
    final int rowcount = TEST_UTIL.loadRegion(this.parent, CF, true);
    assertTrue(rowcount > 0);
    int parentRowCount = TEST_UTIL.countRows(this.parent);
    assertEquals(rowcount, parentRowCount);

    // Pretend region's blocks are not in the cache, used for
    // testWholesomeSplitWithHFileV1
    CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
    ((LruBlockCache) cacheConf.getBlockCache()).clearCache();

    // Start transaction.
    SplitTransactionImpl st = prepareGOOD_SPLIT_ROW();

    // Run the execute.  Look at what it returns.
    Server mockServer = Mockito.mock(Server.class);
    when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
    PairOfSameType<Region> daughters = st.execute(mockServer, null);
    // Do some assertions about execution.
    assertTrue(this.fs.exists(this.parent.getRegionFileSystem().getSplitsDir()));
    // Assert the parent region is closed.
    assertTrue(this.parent.isClosed());

    // Assert splitdir is empty -- because its content will have been moved out
    // to be under the daughter region dirs.
    assertEquals(0, this.fs.listStatus(this.parent.getRegionFileSystem().getSplitsDir()).length);
    // Check daughters have correct key span.
    assertTrue(
        Bytes.equals(
            parent.getRegionInfo().getStartKey(),
            daughters.getFirst().getRegionInfo().getStartKey()));
    assertTrue(Bytes.equals(GOOD_SPLIT_ROW, daughters.getFirst().getRegionInfo().getEndKey()));
    assertTrue(Bytes.equals(daughters.getSecond().getRegionInfo().getStartKey(), GOOD_SPLIT_ROW));
    assertTrue(
        Bytes.equals(
            parent.getRegionInfo().getEndKey(), daughters.getSecond().getRegionInfo().getEndKey()));
    // Count rows. daughters are already open
    int daughtersRowCount = 0;
    for (Region openRegion : daughters) {
      try {
        int count = TEST_UTIL.countRows(openRegion);
        assertTrue(count > 0 && count != rowcount);
        daughtersRowCount += count;
      } finally {
        HBaseTestingUtility.closeRegionAndWAL(openRegion);
      }
    }
    assertEquals(rowcount, daughtersRowCount);
    // Assert the write lock is no longer held on parent
    assertTrue(!this.parent.lock.writeLock().isHeldByCurrentThread());
  }
  /**
   * Inspect the log directory to recover any log file without
   * an active region server.
   */
  void splitLogAfterStartup() {
    boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors",
        HLog.SPLIT_SKIP_ERRORS_DEFAULT);
    Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
    do {
      if (master.isStopped()) {
        LOG.warn("Master stopped while splitting logs");
        break;
      }
      List<ServerName> serverNames = new ArrayList<ServerName>();
      try {
        if (!this.fs.exists(logsDirPath)) return;
        FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null);
        // Get online servers after getting log folders to avoid log folder deletion of newly
        // checked in region servers . see HBASE-5916
        Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers()
            .keySet();

        if (logFolders == null || logFolders.length == 0) {
          LOG.debug("No log files to split, proceeding...");
          return;
        }
        for (FileStatus status : logFolders) {
          String sn = status.getPath().getName();
          // truncate splitting suffix if present (for ServerName parsing)
          if (sn.endsWith(HLog.SPLITTING_EXT)) {
            sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length());
          }
          ServerName serverName = ServerName.parseServerName(sn);
          if (!onlineServers.contains(serverName)) {
            LOG.info("Log folder " + status.getPath() + " doesn't belong "
                + "to a known region server, splitting");
            serverNames.add(serverName);
          } else {
            LOG.info("Log folder " + status.getPath()
                + " belongs to an existing region server");
          }
        }
        splitLog(serverNames);
        retrySplitting = false;
      } catch (IOException ioe) {
        LOG.warn("Failed splitting of " + serverNames, ioe);
        if (!checkFileSystem()) {
          LOG.warn("Bad Filesystem, exiting");
          Runtime.getRuntime().halt(1);
        }
        try {
          if (retrySplitting) {
            Thread.sleep(conf.getInt(
              "hbase.hlog.split.failure.retry.interval", 30 * 1000));
          }
        } catch (InterruptedException e) {
          LOG.warn("Interrupted, aborting since cannot return w/o splitting");
          Thread.currentThread().interrupt();
          retrySplitting = false;
          Runtime.getRuntime().halt(1);
        }
      }
    } while (retrySplitting);
  }
 private void OpenRegion(
     Server server, RegionServerServices rss, HTableDescriptor htd, HRegionInfo hri)
     throws IOException, NodeExistsException, KeeperException, DeserializationException {
   // Create it OFFLINE node, which is what Master set before sending OPEN RPC
   ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName());
   OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd);
   rss.getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
   openHandler.process();
   // This parse is not used?
   RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()));
   // delete the node, which is what Master do after the region is opened
   ZKAssign.deleteNode(
       server.getZooKeeper(),
       hri.getEncodedName(),
       EventType.RS_ZK_REGION_OPENED,
       server.getServerName());
 }
Exemplo n.º 8
0
 DaughterOpener(final Server s, final Region r) {
   super(
       (s == null ? "null-services" : s.getServerName())
           + "-daughterOpener="
           + r.getRegionInfo().getEncodedName());
   setDaemon(true);
   this.server = s;
   this.r = r;
 }
  /**
   * Perform time consuming opening of the merged region.
   *
   * @param server Hosting server instance. Can be null when testing
   * @param services Used to online/offline regions.
   * @param merged the merged region
   * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
   *     RegionServerServices)}
   */
  void openMergedRegion(final Server server, final RegionServerServices services, HRegion merged)
      throws IOException {
    boolean stopped = server != null && server.isStopped();
    boolean stopping = services != null && services.isStopping();
    if (stopped || stopping) {
      LOG.info(
          "Not opening merged region  "
              + merged.getRegionNameAsString()
              + " because stopping="
              + stopping
              + ", stopped="
              + stopped);
      return;
    }
    HRegionInfo hri = merged.getRegionInfo();
    LoggingProgressable reporter =
        server == null
            ? null
            : new LoggingProgressable(
                hri,
                server
                    .getConfiguration()
                    .getLong("hbase.regionserver.regionmerge.open.log.interval", 10000));
    merged.openHRegion(reporter);

    if (services != null) {
      try {
        if (useCoordinationForAssignment) {
          services.postOpenDeployTasks(merged);
        } else if (!services.reportRegionStateTransition(
            TransitionCode.MERGED,
            mergedRegionInfo,
            region_a.getRegionInfo(),
            region_b.getRegionInfo())) {
          throw new IOException(
              "Failed to report merged region to master: " + mergedRegionInfo.getShortNameToLog());
        }
        services.addToOnlineRegions(merged);
      } catch (KeeperException ke) {
        throw new IOException(ke);
      }
    }
  }
Exemplo n.º 10
0
 public HLogSplitterHandler(
     final Server server,
     String curTask,
     final MutableInt curTaskZKVersion,
     CancelableProgressable reporter,
     AtomicInteger inProgressTasks,
     TaskExecutor splitTaskExecutor,
     RecoveryMode mode) {
   super(server, EventType.RS_LOG_REPLAY);
   this.curTask = curTask;
   this.wal = ZKSplitLog.getFileName(curTask);
   this.reporter = reporter;
   this.inProgressTasks = inProgressTasks;
   this.inProgressTasks.incrementAndGet();
   this.serverName = server.getServerName();
   this.zkw = server.getZooKeeper();
   this.curTaskZKVersion = curTaskZKVersion;
   this.splitTaskExecutor = splitTaskExecutor;
   this.mode = mode;
 }
Exemplo n.º 11
0
 /**
  * Checks to see if the file system is still accessible.
  * If not, sets closed
  * @return false if file system is not available
  */
 public boolean checkFileSystem() {
   if (this.fsOk) {
     try {
       FSUtils.checkFileSystemAvailable(this.fs);
       FSUtils.checkDfsSafeMode(this.conf);
     } catch (IOException e) {
       master.abort("Shutting down HBase cluster: file system not available", e);
       this.fsOk = false;
     }
   }
   return this.fsOk;
 }
Exemplo n.º 12
0
 /**
  * Perform time consuming opening of the daughter regions.
  *
  * @param server Hosting server instance. Can be null when testing
  * @param services Used to online/offline regions.
  * @param a first daughter region
  * @param a second daughter region
  * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
  *     RegionServerServices)}
  */
 @VisibleForTesting
 void openDaughters(final Server server, final RegionServerServices services, Region a, Region b)
     throws IOException {
   boolean stopped = server != null && server.isStopped();
   boolean stopping = services != null && services.isStopping();
   // TODO: Is this check needed here?
   if (stopped || stopping) {
     LOG.info(
         "Not opening daughters "
             + b.getRegionInfo().getRegionNameAsString()
             + " and "
             + a.getRegionInfo().getRegionNameAsString()
             + " because stopping="
             + stopping
             + ", stopped="
             + stopped);
   } else {
     // Open daughters in parallel.
     DaughterOpener aOpener = new DaughterOpener(server, a);
     DaughterOpener bOpener = new DaughterOpener(server, b);
     aOpener.start();
     bOpener.start();
     try {
       aOpener.join();
       if (aOpener.getException() == null) {
         transition(SplitTransactionPhase.OPENED_REGION_A);
       }
       bOpener.join();
       if (bOpener.getException() == null) {
         transition(SplitTransactionPhase.OPENED_REGION_B);
       }
     } catch (InterruptedException e) {
       throw (InterruptedIOException) new InterruptedIOException().initCause(e);
     }
     if (aOpener.getException() != null) {
       throw new IOException("Failed " + aOpener.getName(), aOpener.getException());
     }
     if (bOpener.getException() != null) {
       throw new IOException("Failed " + bOpener.getName(), bOpener.getException());
     }
     if (services != null) {
       if (!services.reportRegionStateTransition(
           TransitionCode.SPLIT, parent.getRegionInfo(), hri_a, hri_b)) {
         throw new IOException(
             "Failed to report split region to master: "
                 + parent.getRegionInfo().getShortNameToLog());
       }
       // Should add it to OnlineRegions
       services.addToOnlineRegions(b);
       services.addToOnlineRegions(a);
     }
   }
 }
Exemplo n.º 13
0
 /** Test SplitTransactionListener */
 @Test
 public void testSplitTransactionListener() throws IOException {
   SplitTransactionImpl st = new SplitTransactionImpl(this.parent, GOOD_SPLIT_ROW);
   SplitTransaction.TransactionListener listener =
       Mockito.mock(SplitTransaction.TransactionListener.class);
   st.registerTransactionListener(listener);
   st.prepare();
   Server mockServer = Mockito.mock(Server.class);
   when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
   PairOfSameType<Region> daughters = st.execute(mockServer, null);
   verify(listener)
       .transition(
           st,
           SplitTransaction.SplitTransactionPhase.STARTED,
           SplitTransaction.SplitTransactionPhase.PREPARED);
   verify(listener, times(15))
       .transition(
           any(SplitTransaction.class),
           any(SplitTransaction.SplitTransactionPhase.class),
           any(SplitTransaction.SplitTransactionPhase.class));
   verifyNoMoreInteractions(listener);
 }
 /**
  * Run the transaction.
  *
  * @param server Hosting server instance. Can be null when testing
  * @param services Used to online/offline regions.
  * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
  *     RegionServerServices)}
  * @return merged region
  * @throws IOException
  * @see #rollback(Server, RegionServerServices)
  */
 public HRegion execute(final Server server, final RegionServerServices services)
     throws IOException {
   useCoordinationForAssignment =
       server == null ? true : ConfigUtil.useZKForAssignment(server.getConfiguration());
   if (rmd == null) {
     rmd =
         server != null && server.getCoordinatedStateManager() != null
             ? ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
                 .getRegionMergeCoordination()
                 .getDefaultDetails()
             : null;
   }
   if (rsCoprocessorHost == null) {
     rsCoprocessorHost =
         server != null ? ((HRegionServer) server).getRegionServerCoprocessorHost() : null;
   }
   HRegion mergedRegion = createMergedRegion(server, services);
   if (rsCoprocessorHost != null) {
     rsCoprocessorHost.postMergeCommit(this.region_a, this.region_b, mergedRegion);
   }
   return stepsAfterPONR(server, services, mergedRegion);
 }
Exemplo n.º 15
0
 /**
  * Open daughter regions, add them to online list and update meta.
  *
  * @param server
  * @param daughter
  * @throws IOException
  * @throws KeeperException
  */
 @VisibleForTesting
 void openDaughterRegion(final Server server, final Region daughter)
     throws IOException, KeeperException {
   HRegionInfo hri = daughter.getRegionInfo();
   LoggingProgressable reporter =
       server == null
           ? null
           : new LoggingProgressable(
               hri,
               server
                   .getConfiguration()
                   .getLong("hbase.regionserver.split.daughter.open.log.interval", 10000));
   ((HRegion) daughter).openHRegion(reporter);
 }
Exemplo n.º 16
0
  @Test
  public void testFailAfterPONR() throws IOException, KeeperException {
    final int rowcount = TEST_UTIL.loadRegion(this.parent, CF);
    assertTrue(rowcount > 0);
    int parentRowCount = TEST_UTIL.countRows(this.parent);
    assertEquals(rowcount, parentRowCount);

    // Start transaction.
    SplitTransactionImpl st = prepareGOOD_SPLIT_ROW();
    SplitTransactionImpl spiedUponSt = spy(st);
    Mockito.doThrow(new MockedFailedDaughterOpen())
        .when(spiedUponSt)
        .openDaughterRegion((Server) Mockito.anyObject(), (HRegion) Mockito.anyObject());

    // Run the execute.  Look at what it returns.
    boolean expectedException = false;
    Server mockServer = Mockito.mock(Server.class);
    when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
    try {
      spiedUponSt.execute(mockServer, null);
    } catch (IOException e) {
      if (e.getCause() != null && e.getCause() instanceof MockedFailedDaughterOpen) {
        expectedException = true;
      }
    }
    assertTrue(expectedException);
    // Run rollback returns that we should restart.
    assertFalse(spiedUponSt.rollback(null, null));
    // Make sure that region a and region b are still in the filesystem, that
    // they have not been removed; this is supposed to be the case if we go
    // past point of no return.
    Path tableDir = this.parent.getRegionFileSystem().getTableDir();
    Path daughterADir = new Path(tableDir, spiedUponSt.getFirstDaughter().getEncodedName());
    Path daughterBDir = new Path(tableDir, spiedUponSt.getSecondDaughter().getEncodedName());
    assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterADir));
    assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterBDir));
  }
Exemplo n.º 17
0
 public MasterFileSystem(Server master, MasterServices services,
     MasterMetrics metrics, boolean masterRecovery)
 throws IOException {
   this.conf = master.getConfiguration();
   this.master = master;
   this.services = services;
   this.metrics = metrics;
   // Set filesystem to be that of this.rootdir else we get complaints about
   // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is
   // default localfs.  Presumption is that rootdir is fully-qualified before
   // we get to here with appropriate fs scheme.
   //设置HBase根目录
   this.rootdir = FSUtils.getRootDir(conf);
   //HBase表创建和删除的临时目录/${hbase.rootdir}/.tmp
   this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY);
   // Cover both bases, the old way of setting default fs and the new.
   // We're supposed to run on 0.20 and 0.21 anyways.
   this.fs = this.rootdir.getFileSystem(conf);
   String fsUri = this.fs.getUri().toString();
   conf.set("fs.default.name", fsUri);
   conf.set("fs.defaultFS", fsUri);
   // make sure the fs has the same conf
   fs.setConf(conf);
   this.distributedLogSplitting =
     conf.getBoolean("hbase.master.distributed.log.splitting", true);
   if (this.distributedLogSplitting) {
     this.splitLogManager = new SplitLogManager(master.getZooKeeper(),
         master.getConfiguration(), master, master.getServerName().toString());
     this.splitLogManager.finishInitialization(masterRecovery);
   } else {
     this.splitLogManager = null;
   }
   // setup the filesystem variable
   // set up the archived logs path
   this.oldLogDir = createInitialFileSystemLayout();
 }
 public HRegion stepsAfterPONR(
     final Server server, final RegionServerServices services, HRegion mergedRegion)
     throws IOException {
   openMergedRegion(server, services, mergedRegion);
   if (useCoordination(server)) {
     ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
         .getRegionMergeCoordination()
         .completeRegionMergeTransaction(
             services, mergedRegionInfo, region_a, region_b, rmd, mergedRegion);
   }
   if (rsCoprocessorHost != null) {
     rsCoprocessorHost.postMerge(this.region_a, this.region_b, mergedRegion);
   }
   return mergedRegion;
 }
Exemplo n.º 19
0
 public WALSplitterHandler(
     final Server server,
     SplitLogWorkerCoordination coordination,
     SplitLogWorkerCoordination.SplitTaskDetails splitDetails,
     CancelableProgressable reporter,
     AtomicInteger inProgressTasks,
     TaskExecutor splitTaskExecutor,
     RecoveryMode mode) {
   super(server, EventType.RS_LOG_REPLAY);
   this.splitTaskDetails = splitDetails;
   this.coordination = coordination;
   this.reporter = reporter;
   this.inProgressTasks = inProgressTasks;
   this.inProgressTasks.incrementAndGet();
   this.serverName = server.getServerName();
   this.splitTaskExecutor = splitTaskExecutor;
   this.mode = mode;
 }
Exemplo n.º 20
0
  /**
   * Reproduce locking up that happens when we get an inopportune sync during setup for zigzaglatch
   * wait. See HBASE-14317. If below is broken, we will see this test timeout because it is locked
   * up.
   *
   * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to set up
   * a dodgy WAL that will throw an exception when we go to append to it.
   */
  @Test(timeout = 20000)
  public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      // Latch to hold up processing until after another operation has had time to run.
      CountDownLatch latch = new CountDownLatch(1);

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected void afterCreatingZigZagLatch() {
        // If throwException set, then append will throw an exception causing the WAL to be
        // rolled. We'll come in here. Hold up processing until a sync can get in before
        // the zigzag has time to complete its setup and get its own sync in. This is what causes
        // the lock up we've seen in production.
        if (throwException) {
          try {
            LOG.info("LATCHED");
            // So, timing can have it that the test can run and the bad flush below happens
            // before we get here. In this case, we'll be stuck waiting on this latch but there
            // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint...
            // because all WALs have rolled. In this case, just give up on test.
            if (!this.latch.await(5, TimeUnit.SECONDS)) {
              LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
            }
          } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
      }

      @Override
      protected void beforeWaitOnSafePoint() {
        if (throwException) {
          LOG.info("COUNTDOWN");
          // Don't countdown latch until someone waiting on it otherwise, the above
          // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll
          // be stuck; test won't go down
          while (this.latch.getCount() <= 0) Threads.sleep(1);
          this.latch.countDown();
        }
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server = Mockito.mock(Server.class);
    Mockito.when(server.getConfiguration()).thenReturn(CONF);
    Mockito.when(server.isStopped()).thenReturn(false);
    Mockito.when(server.isAborted()).thenReturn(false);
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path originalWAL = dodgyWAL.getCurrentFileName();
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      // First get something into memstore. Make a Put and then pull the Cell out of it. Will
      // manage append and sync carefully in below to manufacture hang. We keep adding same
      // edit. WAL subsystem doesn't care.
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());
      // Put something in memstore and out in the WAL. Do a big number of appends so we push
      // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
      for (int i = 0; i < 1000; i++) {
        region.put(put);
      }
      // Set it so we start throwing exceptions.
      LOG.info("SET throwing of exception on append");
      dodgyWAL.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // Get a memstore flush going too so we have same hung profile as up in the issue over
      // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
      // by the zigzaglatch waiting on syncs to come home.
      Thread t =
          new Thread("Flusher") {
            public void run() {
              try {
                if (region.getMemstoreSize() <= 0) {
                  throw new IOException("memstore size=" + region.getMemstoreSize());
                }
                region.flush(false);
              } catch (IOException e) {
                // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
                // when roll completes.
                LOG.info("In flush", e);
              }
              LOG.info("Exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      // Wait until
      while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
      // Now assert I got a new WAL file put in place even though loads of errors above.
      assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
      // Can I append to it?
      dodgyWAL.throwException = false;
      try {
        region.put(put);
      } catch (Exception e) {
        LOG.info("In the put", e);
      }
    } finally {
      // To stop logRoller, its server has to say it is stopped.
      Mockito.when(server.isStopped()).thenReturn(true);
      if (logRoller != null) logRoller.close();
      try {
        if (region != null) region.close();
        if (dodgyWAL != null) dodgyWAL.close();
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
Exemplo n.º 21
0
  @Test
  public void testLogCleaning() throws Exception {
    Configuration conf = TEST_UTIL.getConfiguration();
    // set TTL
    long ttl = 10000;
    conf.setLong("hbase.master.logcleaner.ttl", ttl);
    conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT);
    Replication.decorateMasterConfiguration(conf);
    Server server = new DummyServer();
    ReplicationQueues repQueues =
        ReplicationFactory.getReplicationQueues(server.getZooKeeper(), conf, server);
    repQueues.init(server.getServerName().toString());
    final Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(), HConstants.HREGION_OLDLOGDIR_NAME);
    String fakeMachineName = URLEncoder.encode(server.getServerName().toString(), "UTF8");

    final FileSystem fs = FileSystem.get(conf);

    // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files
    long now = System.currentTimeMillis();
    fs.delete(oldLogDir, true);
    fs.mkdirs(oldLogDir);
    // Case 1: 2 invalid files, which would be deleted directly
    fs.createNewFile(new Path(oldLogDir, "a"));
    fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a"));
    // Case 2: 1 "recent" file, not even deletable for the first log cleaner
    // (TimeToLiveLogCleaner), so we are not going down the chain
    System.out.println("Now is: " + now);
    for (int i = 1; i < 31; i++) {
      // Case 3: old files which would be deletable for the first log cleaner
      // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner)
      Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i));
      fs.createNewFile(fileName);
      // Case 4: put 3 old log files in ZK indicating that they are scheduled
      // for replication so these files would pass the first log cleaner
      // (TimeToLiveLogCleaner) but would be rejected by the second
      // (ReplicationLogCleaner)
      if (i % (30 / 3) == 1) {
        repQueues.addLog(fakeMachineName, fileName.getName());
        System.out.println("Replication log file: " + fileName);
      }
    }

    // sleep for sometime to get newer modifcation time
    Thread.sleep(ttl);
    fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now));

    // Case 2: 1 newer file, not even deletable for the first log cleaner
    // (TimeToLiveLogCleaner), so we are not going down the chain
    fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000)));

    for (FileStatus stat : fs.listStatus(oldLogDir)) {
      System.out.println(stat.getPath().toString());
    }

    assertEquals(34, fs.listStatus(oldLogDir).length);

    LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, oldLogDir);
    cleaner.chore();

    // We end up with the current log file, a newer one and the 3 old log
    // files which are scheduled for replication
    TEST_UTIL.waitFor(
        1000,
        new Waiter.Predicate<Exception>() {
          @Override
          public boolean evaluate() throws Exception {
            return 5 == fs.listStatus(oldLogDir).length;
          }
        });

    for (FileStatus file : fs.listStatus(oldLogDir)) {
      System.out.println("Kept log files: " + file.getPath().getName());
    }
  }
  /**
   * Prepare the merged region and region files.
   *
   * @param server Hosting server instance. Can be null when testing
   * @param services Used to online/offline regions.
   * @return merged region
   * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
   *     RegionServerServices)}
   */
  HRegion createMergedRegion(final Server server, final RegionServerServices services)
      throws IOException {
    LOG.info(
        "Starting merge of "
            + region_a
            + " and "
            + region_b.getRegionNameAsString()
            + ", forcible="
            + forcible);
    if ((server != null && server.isStopped()) || (services != null && services.isStopping())) {
      throw new IOException("Server is stopped or stopping");
    }

    if (rsCoprocessorHost != null) {
      if (rsCoprocessorHost.preMerge(this.region_a, this.region_b)) {
        throw new IOException(
            "Coprocessor bypassing regions " + this.region_a + " " + this.region_b + " merge.");
      }
    }

    // If true, no cluster to write meta edits to or to use coordination.
    boolean testing =
        server == null
            ? true
            : server.getConfiguration().getBoolean("hbase.testing.nocluster", false);

    HRegion mergedRegion = stepsBeforePONR(server, services, testing);

    @MetaMutationAnnotation List<Mutation> metaEntries = new ArrayList<Mutation>();
    if (rsCoprocessorHost != null) {
      if (rsCoprocessorHost.preMergeCommit(this.region_a, this.region_b, metaEntries)) {
        throw new IOException(
            "Coprocessor bypassing regions " + this.region_a + " " + this.region_b + " merge.");
      }
      try {
        for (Mutation p : metaEntries) {
          HRegionInfo.parseRegionName(p.getRow());
        }
      } catch (IOException e) {
        LOG.error(
            "Row key of mutation from coprocessor is not parsable as region name."
                + "Mutations from coprocessor should only be for hbase:meta table.",
            e);
        throw e;
      }
    }

    // This is the point of no return. Similar with SplitTransaction.
    // IF we reach the PONR then subsequent failures need to crash out this
    // regionserver
    this.journal.add(JournalEntry.PONR);

    // Add merged region and delete region_a and region_b
    // as an atomic update. See HBASE-7721. This update to hbase:meta makes the region
    // will determine whether the region is merged or not in case of failures.
    // If it is successful, master will roll-forward, if not, master will
    // rollback
    if (!testing && useCoordinationForAssignment) {
      if (metaEntries.isEmpty()) {
        MetaTableAccessor.mergeRegions(
            server.getConnection(),
            mergedRegion.getRegionInfo(),
            region_a.getRegionInfo(),
            region_b.getRegionInfo(),
            server.getServerName(),
            region_a.getTableDesc().getRegionReplication());
      } else {
        mergeRegionsAndPutMetaEntries(
            server.getConnection(),
            mergedRegion.getRegionInfo(),
            region_a.getRegionInfo(),
            region_b.getRegionInfo(),
            server.getServerName(),
            metaEntries,
            region_a.getTableDesc().getRegionReplication());
      }
    } else if (services != null && !useCoordinationForAssignment) {
      if (!services.reportRegionStateTransition(
          TransitionCode.MERGE_PONR,
          mergedRegionInfo,
          region_a.getRegionInfo(),
          region_b.getRegionInfo())) {
        // Passed PONR, let SSH clean it up
        throw new IOException(
            "Failed to notify master that merge passed PONR: "
                + region_a.getRegionInfo().getRegionNameAsString()
                + " and "
                + region_b.getRegionInfo().getRegionNameAsString());
      }
    }
    return mergedRegion;
  }
 private boolean useCoordination(final Server server) {
   return server != null
       && useCoordinationForAssignment
       && server.getCoordinatedStateManager() != null;
 }
Exemplo n.º 24
0
  /**
   * Reproduce locking up that happens when there's no further syncs after append fails, and causing
   * an isolated sync then infinite wait. See HBASE-16960. If below is broken, we will see this test
   * timeout because it is locked up.
   *
   * <p>Steps for reproduce:<br>
   * 1. Trigger server abort through dodgyWAL1<br>
   * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer event handler thread
   * sleep for a while thus keeping {@code endOfBatch} false<br>
   * 3. Publish a sync then an append which will throw exception, check whether the sync could
   * return
   */
  @Test(timeout = 20000)
  public void testLockup16960() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }

      @Override
      protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter)
          throws IOException {
        if (throwException) {
          throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
        }
        long oldFileLen = 0L;
        oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
        return oldFileLen;
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server =
        new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString());
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
    // go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);

    Path rootDir2 = new Path(dir + getName() + "2");
    final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
    // Add a listener to force ringbuffer event handler sleep for a while
    dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());

    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL1);
    logRoller.addWAL(dodgyWAL2);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());

      LOG.info("SET throwing of exception on append");
      dodgyWAL1.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL1.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // LogRoller call dodgyWAL1.rollWriter get FailedLogCloseException and
      // cause server abort.
      try {
        // wait LogRoller exit.
        Thread.sleep(50);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }

      final CountDownLatch latch = new CountDownLatch(1);

      // make RingBufferEventHandler sleep 1s, so the following sync
      // endOfBatch=false
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("sleep"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      Thread t =
          new Thread("Sync") {
            public void run() {
              try {
                dodgyWAL2.sync();
              } catch (IOException e) {
                LOG.info("In sync", e);
              }
              latch.countDown();
              LOG.info("Sync exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      try {
        // make sure sync have published.
        Thread.sleep(100);
      } catch (InterruptedException e1) {
        e1.printStackTrace();
      }
      // make append throw DamagedWALException
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("DamagedWALException"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      while (latch.getCount() > 0) {
        Threads.sleep(100);
      }
      assertTrue(server.isAborted());
    } finally {
      if (logRoller != null) {
        logRoller.close();
      }
      try {
        if (region != null) {
          region.close();
        }
        if (dodgyWAL1 != null) {
          dodgyWAL1.close();
        }
        if (dodgyWAL2 != null) {
          dodgyWAL2.close();
        }
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
Exemplo n.º 25
0
  /**
   * Prepare the regions and region files.
   *
   * @param server Hosting server instance. Can be null when testing (won't try and update in zk if
   *     a null server)
   * @param services Used to online/offline regions.
   * @param user
   * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
   *     RegionServerServices)}
   * @return Regions created
   */
  @VisibleForTesting
  PairOfSameType<Region> createDaughters(
      final Server server, final RegionServerServices services, User user) throws IOException {
    LOG.info("Starting split of region " + this.parent);
    if ((server != null && server.isStopped()) || (services != null && services.isStopping())) {
      throw new IOException("Server is stopped or stopping");
    }
    assert !this.parent.lock.writeLock().isHeldByCurrentThread()
        : "Unsafe to hold write lock while performing RPCs";

    transition(SplitTransactionPhase.BEFORE_PRE_SPLIT_HOOK);

    // Coprocessor callback
    if (this.parent.getCoprocessorHost() != null) {
      // TODO: Remove one of these
      parent.getCoprocessorHost().preSplit(user);
      parent.getCoprocessorHost().preSplit(splitrow, user);
    }

    transition(SplitTransactionPhase.AFTER_PRE_SPLIT_HOOK);

    // If true, no cluster to write meta edits to or to update znodes in.
    boolean testing =
        server == null
            ? true
            : server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
    this.fileSplitTimeout =
        testing
            ? this.fileSplitTimeout
            : server
                .getConfiguration()
                .getLong("hbase.regionserver.fileSplitTimeout", this.fileSplitTimeout);

    PairOfSameType<Region> daughterRegions = stepsBeforePONR(server, services, testing);

    final List<Mutation> metaEntries = new ArrayList<Mutation>();
    boolean ret = false;
    if (this.parent.getCoprocessorHost() != null) {
      ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries, user);
      if (ret) {
        throw new IOException(
            "Coprocessor bypassing region "
                + parent.getRegionInfo().getRegionNameAsString()
                + " split.");
      }
      try {
        for (Mutation p : metaEntries) {
          HRegionInfo.parseRegionName(p.getRow());
        }
      } catch (IOException e) {
        LOG.error(
            "Row key of mutation from coprossor is not parsable as region name."
                + "Mutations from coprocessor should only for hbase:meta table.");
        throw e;
      }
    }

    // This is the point of no return.  Adding subsequent edits to .META. as we
    // do below when we do the daughter opens adding each to .META. can fail in
    // various interesting ways the most interesting of which is a timeout
    // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
    // then subsequent failures need to crash out this regionserver; the
    // server shutdown processing should be able to fix-up the incomplete split.
    // The offlined parent will have the daughters as extra columns.  If
    // we leave the daughter regions in place and do not remove them when we
    // crash out, then they will have their references to the parent in place
    // still and the server shutdown fixup of .META. will point to these
    // regions.
    // We should add PONR JournalEntry before offlineParentInMeta,so even if
    // OfflineParentInMeta timeout,this will cause regionserver exit,and then
    // master ServerShutdownHandler will fix daughter & avoid data loss. (See
    // HBase-4562).

    transition(SplitTransactionPhase.PONR);

    // Edit parent in meta.  Offlines parent region and adds splita and splitb
    // as an atomic update. See HBASE-7721. This update to META makes the region
    // will determine whether the region is split or not in case of failures.
    // If it is successful, master will roll-forward, if not, master will rollback
    // and assign the parent region.
    if (services != null
        && !services.reportRegionStateTransition(
            TransitionCode.SPLIT_PONR, parent.getRegionInfo(), hri_a, hri_b)) {
      // Passed PONR, let SSH clean it up
      throw new IOException(
          "Failed to notify master that split passed PONR: "
              + parent.getRegionInfo().getRegionNameAsString());
    }
    return daughterRegions;
  }
  public HRegion stepsBeforePONR(
      final Server server, final RegionServerServices services, boolean testing)
      throws IOException {
    if (rmd == null) {
      rmd =
          server != null && server.getCoordinatedStateManager() != null
              ? ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
                  .getRegionMergeCoordination()
                  .getDefaultDetails()
              : null;
    }

    // If server doesn't have a coordination state manager, don't do coordination actions.
    if (useCoordination(server)) {
      try {
        ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
            .getRegionMergeCoordination()
            .startRegionMergeTransaction(
                mergedRegionInfo,
                server.getServerName(),
                region_a.getRegionInfo(),
                region_b.getRegionInfo());
      } catch (IOException e) {
        throw new IOException(
            "Failed to start region merge transaction for "
                + this.mergedRegionInfo.getRegionNameAsString(),
            e);
      }
    } else if (services != null && !useCoordinationForAssignment) {
      if (!services.reportRegionStateTransition(
          TransitionCode.READY_TO_MERGE,
          mergedRegionInfo,
          region_a.getRegionInfo(),
          region_b.getRegionInfo())) {
        throw new IOException(
            "Failed to get ok from master to merge "
                + region_a.getRegionInfo().getRegionNameAsString()
                + " and "
                + region_b.getRegionInfo().getRegionNameAsString());
      }
    }
    this.journal.add(JournalEntry.SET_MERGING);
    if (useCoordination(server)) {
      // After creating the merge node, wait for master to transition it
      // from PENDING_MERGE to MERGING so that we can move on. We want master
      // knows about it and won't transition any region which is merging.
      ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
          .getRegionMergeCoordination()
          .waitForRegionMergeTransaction(services, mergedRegionInfo, region_a, region_b, rmd);
    }

    this.region_a.getRegionFileSystem().createMergesDir();
    this.journal.add(JournalEntry.CREATED_MERGE_DIR);

    Map<byte[], List<StoreFile>> hstoreFilesOfRegionA =
        closeAndOfflineRegion(services, this.region_a, true, testing);
    Map<byte[], List<StoreFile>> hstoreFilesOfRegionB =
        closeAndOfflineRegion(services, this.region_b, false, testing);

    assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;

    //
    // mergeStoreFiles creates merged region dirs under the region_a merges dir
    // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
    // clean this up.
    mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);

    if (useCoordination(server)) {
      try {
        // Do the final check in case any merging region is moved somehow. If so, the transition
        // will fail.
        ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
            .getRegionMergeCoordination()
            .confirmRegionMergeTransaction(
                this.mergedRegionInfo,
                region_a.getRegionInfo(),
                region_b.getRegionInfo(),
                server.getServerName(),
                rmd);
      } catch (IOException e) {
        throw new IOException(
            "Failed setting MERGING on " + this.mergedRegionInfo.getRegionNameAsString(), e);
      }
    }

    // Log to the journal that we are creating merged region. We could fail
    // halfway through. If we do, we could have left
    // stuff in fs that needs cleanup -- a storefile or two. Thats why we
    // add entry to journal BEFORE rather than AFTER the change.
    this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
    HRegion mergedRegion =
        createMergedRegionFromMerges(this.region_a, this.region_b, this.mergedRegionInfo);
    return mergedRegion;
  }
  /**
   * @param server Hosting server instance (May be null when testing).
   * @param services Services of regionserver, used to online regions.
   * @throws IOException If thrown, rollback failed. Take drastic action.
   * @return True if we successfully rolled back, false if we got to the point of no return and so
   *     now need to abort the server to minimize damage.
   */
  @SuppressWarnings("deprecation")
  public boolean rollback(final Server server, final RegionServerServices services)
      throws IOException {
    assert this.mergedRegionInfo != null;
    // Coprocessor callback
    if (rsCoprocessorHost != null) {
      rsCoprocessorHost.preRollBackMerge(this.region_a, this.region_b);
    }

    boolean result = true;
    ListIterator<JournalEntry> iterator = this.journal.listIterator(this.journal.size());
    // Iterate in reverse.
    while (iterator.hasPrevious()) {
      JournalEntry je = iterator.previous();
      switch (je) {
        case SET_MERGING:
          if (useCoordination(server)) {
            ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
                .getRegionMergeCoordination()
                .clean(this.mergedRegionInfo);
          } else if (services != null
              && !useCoordinationForAssignment
              && !services.reportRegionStateTransition(
                  TransitionCode.MERGE_REVERTED,
                  mergedRegionInfo,
                  region_a.getRegionInfo(),
                  region_b.getRegionInfo())) {
            return false;
          }
          break;

        case CREATED_MERGE_DIR:
          this.region_a.writestate.writesEnabled = true;
          this.region_b.writestate.writesEnabled = true;
          this.region_a.getRegionFileSystem().cleanupMergesDir();
          break;

        case CLOSED_REGION_A:
          try {
            // So, this returns a seqid but if we just closed and then reopened,
            // we should be ok. On close, we flushed using sequenceid obtained
            // from hosting regionserver so no need to propagate the sequenceid
            // returned out of initialize below up into regionserver as we
            // normally do.
            this.region_a.initialize();
          } catch (IOException e) {
            LOG.error(
                "Failed rollbacking CLOSED_REGION_A of region "
                    + this.region_a.getRegionNameAsString(),
                e);
            throw new RuntimeException(e);
          }
          break;

        case OFFLINED_REGION_A:
          if (services != null) services.addToOnlineRegions(this.region_a);
          break;

        case CLOSED_REGION_B:
          try {
            this.region_b.initialize();
          } catch (IOException e) {
            LOG.error(
                "Failed rollbacking CLOSED_REGION_A of region "
                    + this.region_b.getRegionNameAsString(),
                e);
            throw new RuntimeException(e);
          }
          break;

        case OFFLINED_REGION_B:
          if (services != null) services.addToOnlineRegions(this.region_b);
          break;

        case STARTED_MERGED_REGION_CREATION:
          this.region_a.getRegionFileSystem().cleanupMergedRegion(this.mergedRegionInfo);
          break;

        case PONR:
          // We got to the point-of-no-return so we need to just abort. Return
          // immediately. Do not clean up created merged regions.
          return false;

        default:
          throw new RuntimeException("Unhandled journal entry: " + je);
      }
    }
    // Coprocessor callback
    if (rsCoprocessorHost != null) {
      rsCoprocessorHost.postRollBackMerge(this.region_a, this.region_b);
    }

    return result;
  }