/**
   * Test interruptable while blocking wait on root and meta.
   *
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testInterruptWaitOnMetaAndRoot() throws IOException, InterruptedException {
    HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
    HConnection connection = mockConnection(implementation);

    final CatalogTracker ct = constructAndStartCatalogTracker(connection);
    ServerName hsa = ct.getRootLocation();
    Assert.assertNull(hsa);
    ServerName meta = ct.getMetaLocation();
    Assert.assertNull(meta);
    Thread t =
        new Thread() {
          @Override
          public void run() {
            try {
              ct.waitForMeta();
            } catch (InterruptedException e) {
              throw new RuntimeException("Interrupted", e);
            }
          }
        };
    t.start();
    while (!t.isAlive()) Threads.sleep(1);
    Threads.sleep(1);
    assertTrue(t.isAlive());
    ct.stop();
    // Join the thread... should exit shortly.
    t.join();
  }
 private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
   t.start();
   while (!t.isAlive()) {
     // Wait
   }
   // Wait one second.
   Threads.sleep(ms);
   Assert.assertTrue("Assert " + t.getName() + " still waiting", t.isAlive());
 }
  private int runTest() throws Exception {
    LOG.info("Starting the test");

    String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName());
    long runtime = util.getConfiguration().getLong(runtimeKey, DEFAULT_RUN_TIME);

    String numThreadKey = String.format(NUM_THREADS_KEY, this.getClass().getSimpleName());
    numThreads = util.getConfiguration().getInt(numThreadKey, DEFAULT_NUM_THREADS);

    ArrayList<Worker> workers = new ArrayList<>();
    for (int i = 0; i < numThreads; i++) {
      checkException(workers);
      Worker worker = new Worker();
      LOG.info("Launching worker thread " + worker.getName());
      workers.add(worker);
      worker.start();
    }

    Threads.sleep(runtime / 2);
    LOG.info("Stopping creating new tables");
    create_table.set(false);
    Threads.sleep(runtime / 2);
    LOG.info("Runtime is up");
    running.set(false);

    checkException(workers);

    for (Worker worker : workers) {
      worker.join();
    }
    LOG.info("All Worker threads stopped");

    // verify
    LOG.info("Verify actions of all threads succeeded");
    checkException(workers);
    LOG.info("Verify namespaces");
    verifyNamespaces();
    LOG.info("Verify states of all tables");
    verifyTables();

    // RUN HBCK

    HBaseFsck hbck = null;
    try {
      LOG.info("Running hbck");
      hbck = HbckTestingUtil.doFsck(util.getConfiguration(), false);
      if (HbckTestingUtil.inconsistencyFound(hbck)) {
        // Find the inconsistency during HBCK. Leave table and namespace undropped so that
        // we can check outside the test.
        keepObjectsAtTheEnd = true;
      }
      HbckTestingUtil.assertNoErrors(hbck);
      LOG.info("Finished hbck");
    } finally {
      if (hbck != null) {
        hbck.close();
      }
    }
    return 0;
  }
  @Test
  public void testPreWALRestoreSkip() throws Exception {
    LOG.info(TestRegionObserverInterface.class.getName() + ".testPreWALRestoreSkip");
    TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);
    HTable table = util.createTable(tableName, new byte[][] {A, B, C});

    JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer();
    ServerName sn2 = rs1.getRegionServer().getServerName();
    String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName();

    util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes());
    while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) {
      Thread.sleep(100);
    }

    Put put = new Put(ROW);
    put.add(A, A, A);
    put.add(B, B, B);
    put.add(C, C, C);
    table.put(put);
    table.flushCommits();

    cluster.killRegionServer(rs1.getRegionServer().getServerName());
    Threads.sleep(20000); // just to be sure that the kill has fully started.
    util.waitUntilAllRegionsAssigned(tableName);

    verifyMethodResult(
        SimpleRegionObserver.class,
        new String[] {"getCtPreWALRestore", "getCtPostWALRestore"},
        tableName,
        new Integer[] {0, 0});

    util.deleteTable(tableName);
    table.close();
  }
    @Override
    public void run() {
      while (running.get()) {
        switch (random.nextInt() % 2) {
          case 0: // start a server
            try {
              cluster.startServer();
            } catch (Exception e) {
              LOG.warn(e);
              exception.compareAndSet(null, e);
            }
            break;

          case 1: // stop a server
            try {
              cluster.stopRandomServer();
            } catch (Exception e) {
              LOG.warn(e);
              exception.compareAndSet(null, e);
            }
          default:
        }

        Threads.sleep(100);
      }
    }
  @Test(timeout = 30000)
  public void testInfo() {
    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
    MetricsMasterWrapperImpl info = new MetricsMasterWrapperImpl(master);
    assertEquals(master.getSplitPlanCount(), info.getSplitPlanCount(), 0);
    assertEquals(master.getMergePlanCount(), info.getMergePlanCount(), 0);
    assertEquals(master.getAverageLoad(), info.getAverageLoad(), 0);
    assertEquals(master.getClusterId(), info.getClusterId());
    assertEquals(master.getMasterActiveTime(), info.getActiveTime());
    assertEquals(master.getMasterStartTime(), info.getStartTime());
    assertEquals(master.getMasterCoprocessors().length, info.getCoprocessors().length);
    assertEquals(
        master.getServerManager().getOnlineServersList().size(), info.getNumRegionServers());
    assertEquals(5, info.getNumRegionServers());

    String zkServers = info.getZookeeperQuorum();
    assertEquals(zkServers.split(",").length, TEST_UTIL.getZkCluster().getZooKeeperServerNum());

    final int index = 3;
    LOG.info("Stopping " + TEST_UTIL.getMiniHBaseCluster().getRegionServer(index));
    TEST_UTIL.getMiniHBaseCluster().stopRegionServer(index, false);
    TEST_UTIL.getMiniHBaseCluster().waitOnRegionServer(index);
    // We stopped the regionserver but could take a while for the master to notice it so hang here
    // until it does... then move forward to see if metrics wrapper notices.
    while (TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size()
        != 4) {
      Threads.sleep(10);
    }
    assertEquals(4, info.getNumRegionServers());
    assertEquals(1, info.getNumDeadRegionServers());
    assertEquals(1, info.getNumWALFiles());
  }
 /**
  * Returns a Thread pool for the RPC's to region replicas. Similar to Connection's thread pool.
  */
 private ExecutorService getDefaultThreadPool(Configuration conf) {
   int maxThreads = conf.getInt("hbase.region.replica.replication.threads.max", 256);
   int coreThreads = conf.getInt("hbase.region.replica.replication.threads.core", 16);
   if (maxThreads == 0) {
     maxThreads = Runtime.getRuntime().availableProcessors() * 8;
   }
   if (coreThreads == 0) {
     coreThreads = Runtime.getRuntime().availableProcessors() * 8;
   }
   long keepAliveTime = conf.getLong("hbase.region.replica.replication.threads.keepalivetime", 60);
   LinkedBlockingQueue<Runnable> workQueue =
       new LinkedBlockingQueue<Runnable>(
           maxThreads
               * conf.getInt(
                   HConstants.HBASE_CLIENT_MAX_TOTAL_TASKS,
                   HConstants.DEFAULT_HBASE_CLIENT_MAX_TOTAL_TASKS));
   ThreadPoolExecutor tpe =
       new ThreadPoolExecutor(
           coreThreads,
           maxThreads,
           keepAliveTime,
           TimeUnit.SECONDS,
           workQueue,
           Threads.newDaemonThreadFactory(this.getClass().getSimpleName() + "-rpc-shared-"));
   tpe.allowCoreThreadTimeOut(true);
   return tpe;
 }
Beispiel #8
0
      @Override
      public void run() {
        while (!isStopped()) {
          try {
            NavigableMap<HRegionInfo, ServerName> regions =
                MetaScanner.allTableRegions(connection, TABLENAME);

            LOG.info("-------");
            byte[] lastEndKey = HConstants.EMPTY_START_ROW;
            for (HRegionInfo hri : regions.navigableKeySet()) {
              long startKey = 0, endKey = Long.MAX_VALUE;
              if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) {
                startKey = Bytes.toLong(hri.getStartKey());
              }
              if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) {
                endKey = Bytes.toLong(hri.getEndKey());
              }
              LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri);
              Assert.assertTrue(
                  "lastEndKey="
                      + Bytes.toString(lastEndKey)
                      + ", startKey="
                      + Bytes.toString(hri.getStartKey()),
                  Bytes.equals(lastEndKey, hri.getStartKey()));
              lastEndKey = hri.getEndKey();
            }
            Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW));
            LOG.info("-------");
            Threads.sleep(10 + random.nextInt(50));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }
 @Override
 public void preGetOp(
     final ObserverContext<RegionCoprocessorEnvironment> e,
     final Get get,
     final List<Cell> results)
     throws IOException {
   Threads.sleep(2500);
 }
Beispiel #10
0
 /**
  * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
  * for this to avoid re-looking for the integer values.
  */
 protected static void sleepBeforeRetry(String msg, int sleepMultiplier) {
   if (sleepMultiplier > hdfsClientRetriesNumber) {
     LOG.warn(msg + ", retries exhausted");
     return;
   }
   LOG.info(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
   Threads.sleep(baseSleepBeforeRetries * sleepMultiplier);
 }
Beispiel #11
0
 /**
  * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
  * for this to avoid re-looking for the integer values.
  */
 private static void sleepBeforeRetry(
     String msg, int sleepMultiplier, int baseSleepBeforeRetries, int hdfsClientRetriesNumber) {
   if (sleepMultiplier > hdfsClientRetriesNumber) {
     LOG.debug(msg + ", retries exhausted");
     return;
   }
   LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
   Threads.sleep((long) baseSleepBeforeRetries * sleepMultiplier);
 }
  @Test
  public void testRecovery() throws Exception {
    LOG.info(TestRegionObserverInterface.class.getName() + ".testRecovery");
    TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + ".testRecovery");
    HTable table = util.createTable(tableName, new byte[][] {A, B, C});
    try {
      JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer();
      ServerName sn2 = rs1.getRegionServer().getServerName();
      String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName();

      util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes());
      while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) {
        Thread.sleep(100);
      }

      Put put = new Put(ROW);
      put.add(A, A, A);
      put.add(B, B, B);
      put.add(C, C, C);
      table.put(put);

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {
            "hadPreGet",
            "hadPostGet",
            "hadPrePut",
            "hadPostPut",
            "hadPreBatchMutate",
            "hadPostBatchMutate",
            "hadDelete"
          },
          tableName,
          new Boolean[] {false, false, true, true, true, true, false});

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"getCtPreWALRestore", "getCtPostWALRestore", "getCtPrePut", "getCtPostPut"},
          tableName,
          new Integer[] {0, 0, 1, 1});

      cluster.killRegionServer(rs1.getRegionServer().getServerName());
      Threads.sleep(1000); // Let the kill soak in.
      util.waitUntilAllRegionsAssigned(tableName);
      LOG.info("All regions assigned");

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"getCtPrePut", "getCtPostPut"},
          tableName,
          new Integer[] {0, 0});
    } finally {
      util.deleteTable(tableName);
      table.close();
    }
  }
 @Override
 public void run() {
   try {
     Thread.sleep(timeout);
     Threads.printThreadInfo(System.err, "TEST TIMEOUT STACK DUMP");
     System.exit(1); // a timeout happened
   } catch (InterruptedException e) {
     // this is what we want
   }
 }
Beispiel #14
0
 @Override
 protected void beforeWaitOnSafePoint() {
   if (throwException) {
     LOG.info("COUNTDOWN");
     // Don't countdown latch until someone waiting on it otherwise, the above
     // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll
     // be stuck; test won't go down
     while (this.latch.getCount() <= 0) Threads.sleep(1);
     this.latch.countDown();
   }
 }
 private ExecutorService createScanExecutor(Id.Stream streamId) {
   ThreadFactory threadFactory =
       Threads.newDaemonThreadFactory(
           String.format(
               "stream-%s-%s-consumer-scanner-", streamId.getNamespaceId(), streamId.getId()));
   ThreadPoolExecutor executor =
       new ThreadPoolExecutor(
           1, 20, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), threadFactory);
   executor.allowCoreThreadTimeOut(true);
   return executor;
 }
 @Override
 public Result postAppend(
     final ObserverContext<RegionCoprocessorEnvironment> e,
     final Append append,
     final Result result)
     throws IOException {
   if (ct.incrementAndGet() == 1) {
     Threads.sleep(sleepTime.get());
   }
   return result;
 }
Beispiel #17
0
 /**
  * @param masters
  * @param fservers
  */
 public static void shutdown(final List<MasterThread> masters, final List<FServerThread> fservers)
     throws IOException {
   LOG.debug("Shutting down HBase Cluster");
   if (masters != null) {
     // Do backups first.
     JVMClusterUtil.MasterThread activeMaster = null;
     for (JVMClusterUtil.MasterThread t : masters) {
       if (!t.master.isActiveMaster()) {
         t.master.stopMaster();
       } else {
         activeMaster = t;
       }
     }
     // Do active after.
     if (activeMaster != null) activeMaster.master.shutdown();
   }
   if (fservers != null) {
     for (FServerThread t : fservers) {
       if (t.isAlive()) {
         try {
           t.getFServer().stop("Shutdown requested");
           t.join();
         } catch (InterruptedException e) {
           // continue
         }
       }
     }
   }
   if (masters != null) {
     for (JVMClusterUtil.MasterThread t : masters) {
       while (t.master.isAlive()) {
         try {
           // The below has been replaced to debug sometime hangs on end of
           // tests.
           // this.master.join():
           Threads.threadDumpingIsAlive(t.master.getThread());
         } catch (InterruptedException e) {
           // continue
         }
       }
     }
   }
   LOG.info(
       "Shutdown of "
           + ((masters != null) ? masters.size() : "0")
           + " master(s) and "
           + ((fservers != null) ? fservers.size() : "0")
           + " fserver(s) complete");
 }
 @Override
 public MultiResponse multi(RpcController controller, MultiRequest request)
     throws ServiceException {
   int concurrentInvocations = this.multiInvocationsCount.incrementAndGet();
   try {
     if (concurrentInvocations >= tooManyMultiRequests) {
       throw new ServiceException(
           new RegionTooBusyException("concurrentInvocations=" + concurrentInvocations));
     }
     Threads.sleep(multiPause);
     return doMultiResponse(meta, sequenceids, request);
   } finally {
     this.multiInvocationsCount.decrementAndGet();
   }
 }
 /**
  * Wait for Mini HBase Cluster to shut down. Presumes you've already called {@link #shutdown()}.
  */
 public void join() {
   if (this.regionThreads != null) {
     for (Thread t : this.regionThreads) {
       if (t.isAlive()) {
         try {
           Threads.threadDumpingIsAlive(t);
         } catch (InterruptedException e) {
           LOG.debug("Interrupted", e);
         }
       }
     }
   }
   if (this.masterThreads != null) {
     for (Thread t : this.masterThreads) {
       if (t.isAlive()) {
         try {
           Threads.threadDumpingIsAlive(t);
         } catch (InterruptedException e) {
           LOG.debug("Interrupted", e);
         }
       }
     }
   }
 }
  public void testRpcWithChaosMonkey(boolean isSyncClient) throws Throwable {
    LOG.info("Starting test");
    Cluster cluster = new Cluster(10, 100);
    for (int i = 0; i < 10; i++) {
      cluster.startServer();
    }

    ArrayList<SimpleClient> clients = new ArrayList<>();

    // all threads should share the same rpc client
    AbstractRpcClient<?> rpcClient = createRpcClient(conf, isSyncClient);

    for (int i = 0; i < 30; i++) {
      String clientId = "client_" + i + "_";
      LOG.info("Starting client: " + clientId);
      SimpleClient client = new SimpleClient(cluster, rpcClient, clientId);
      client.start();
      clients.add(client);
    }

    LOG.info("Starting MiniChaosMonkey");
    MiniChaosMonkey cm = new MiniChaosMonkey(cluster);
    cm.start();

    Threads.sleep(30000);

    LOG.info("Stopping MiniChaosMonkey");
    cm.stopRunning();
    cm.join();
    cm.rethrowException();

    LOG.info("Stopping clients");
    for (SimpleClient client : clients) {
      LOG.info("Stopping client: " + client.id);
      LOG.info(client.id + " numCalls:" + client.numCalls);
      client.stopRunning();
      client.join();
      client.rethrowException();
      assertTrue(client.numCalls > 10);
    }

    LOG.info("Stopping RpcClient");
    rpcClient.close();

    LOG.info("Stopping Cluster");
    cluster.stopRunning();
  }
Beispiel #21
0
      @Override
      public void run() {
        while (!isStopped()) {
          try {
            List<HRegionInfo> regions =
                MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false);

            // select a random region
            HRegionInfo parent = regions.get(random.nextInt(regions.size()));
            if (parent == null || !TABLENAME.equals(parent.getTable())) {
              continue;
            }

            long startKey = 0, endKey = Long.MAX_VALUE;
            byte[] start = parent.getStartKey();
            byte[] end = parent.getEndKey();
            if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) {
              startKey = Bytes.toLong(parent.getStartKey());
            }
            if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) {
              endKey = Bytes.toLong(parent.getEndKey());
            }
            if (startKey == endKey) {
              continue;
            }

            long midKey =
                BigDecimal.valueOf(startKey)
                    .add(BigDecimal.valueOf(endKey))
                    .divideToIntegralValue(BigDecimal.valueOf(2))
                    .longValue();

            HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey));
            HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end);

            MetaTableAccessor.splitRegion(
                connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0));

            Threads.sleep(random.nextInt(200));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }
Beispiel #22
0
  @Test(timeout = 300000)
  public void testClusterRestart() throws Exception {
    UTIL.startMiniCluster(3);
    while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
      Threads.sleep(1);
    }
    LOG.info("\n\nCreating tables");
    for (byte[] TABLE : TABLES) {
      UTIL.createTable(TABLE, FAMILY);
    }
    for (byte[] TABLE : TABLES) {
      UTIL.waitTableEnabled(TABLE);
    }

    List<HRegionInfo> allRegions = MetaScanner.listAllRegions(UTIL.getConfiguration(), true);
    assertEquals(4, allRegions.size());

    LOG.info("\n\nShutting down cluster");
    UTIL.shutdownMiniHBaseCluster();

    LOG.info("\n\nSleeping a bit");
    Thread.sleep(2000);

    LOG.info("\n\nStarting cluster the second time");
    UTIL.restartHBaseCluster(3);

    // Need to use a new 'Configuration' so we make a new HConnection.
    // Otherwise we're reusing an HConnection that has gone stale because
    // the shutdown of the cluster also called shut of the connection.
    allRegions = MetaScanner.listAllRegions(new Configuration(UTIL.getConfiguration()), true);
    assertEquals(4, allRegions.size());
    LOG.info("\n\nWaiting for tables to be available");
    for (byte[] TABLE : TABLES) {
      try {
        UTIL.createTable(TABLE, FAMILY);
        assertTrue("Able to create table that should already exist", false);
      } catch (TableExistsException tee) {
        LOG.info("Table already exists as expected");
      }
      UTIL.waitTableAvailable(TABLE);
    }
  }
  public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU)
      throws IOException, InterruptedException {
    // Stop master
    HMaster master = HTU.getHBaseCluster().getMaster();
    ServerName masterAddr = master.getServerName();
    master.stopMaster();

    Log.info("Waiting until master thread exits");
    while (HTU.getHBaseCluster().getMasterThread() != null
        && HTU.getHBaseCluster().getMasterThread().isAlive()) {
      Threads.sleep(100);
    }

    HRegionServer.TEST_SKIP_REPORTING_TRANSITION = true;
    // Master is down, so is the meta. We need to assign it somewhere
    // so that regions can be assigned during the mocking phase.
    HRegionServer hrs = HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
    ZooKeeperWatcher zkw = hrs.getZooKeeper();
    MetaTableLocator mtl = new MetaTableLocator();
    ServerName sn = mtl.getMetaRegionLocation(zkw);
    if (sn != null && !masterAddr.equals(sn)) {
      return;
    }

    ProtobufUtil.openRegion(
        hrs.getRSRpcServices(), hrs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO);
    while (true) {
      sn = mtl.getMetaRegionLocation(zkw);
      if (sn != null
          && sn.equals(hrs.getServerName())
          && hrs.onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
        break;
      }
      Thread.sleep(100);
    }
  }
  @Override
  public int run(String[] arg0) throws Exception {
    int errCode = 0;
    // TODO: Make command options.
    // How many servers to fake.
    final int servers = 1;
    // How many regions to put on the faked servers.
    final int regions = 100000;
    // How many 'keys' in the faked regions.
    final long namespaceSpan = 50000000;
    // How long to take to pause after doing a put; make this long if you want to fake a struggling
    // server.
    final long multiPause = 0;
    // Check args make basic sense.
    if ((namespaceSpan < regions) || (regions < servers)) {
      throw new IllegalArgumentException(
          "namespaceSpan="
              + namespaceSpan
              + " must be > regions="
              + regions
              + " which must be > servers="
              + servers);
    }

    // Set my many servers and many regions faking connection in place.
    getConf().set("hbase.client.connection.impl", ManyServersManyRegionsConnection.class.getName());
    // Use simple kv registry rather than zk
    getConf().set("hbase.client.registry.impl", SimpleRegistry.class.getName());
    // When to report fails.  Default is we report the 10th.  This means we'll see log everytime
    // an exception is thrown -- usually RegionTooBusyException when we have more than
    // hbase.test.multi.too.many requests outstanding at any time.
    getConf().setInt("hbase.client.start.log.errors.counter", 0);

    // Ugly but this is only way to pass in configs.into ManyServersManyRegionsConnection class.
    getConf().setInt("hbase.test.regions", regions);
    getConf().setLong("hbase.test.namespace.span", namespaceSpan);
    getConf().setLong("hbase.test.servers", servers);
    getConf().set("hbase.test.tablename", Bytes.toString(BIG_USER_TABLE));
    getConf().setLong("hbase.test.multi.pause.when.done", multiPause);
    // Let there be ten outstanding requests at a time before we throw RegionBusyException.
    getConf().setInt("hbase.test.multi.too.many", 10);
    final int clients = 2;

    // Have them all share the same connection so they all share the same instance of
    // ManyServersManyRegionsConnection so I can keep an eye on how many requests by server.
    final ExecutorService pool = Executors.newCachedThreadPool(Threads.getNamedThreadFactory("p"));
    // Executors.newFixedThreadPool(servers * 10, Threads.getNamedThreadFactory("p"));
    // Share a connection so I can keep counts in the 'server' on concurrency.
    final HConnection sharedConnection = HConnectionManager.createConnection(getConf() /*, pool*/);
    try {
      Thread[] ts = new Thread[clients];
      for (int j = 0; j < ts.length; j++) {
        final int id = j;
        ts[j] =
            new Thread("" + j) {
              final Configuration c = getConf();

              @Override
              public void run() {
                try {
                  cycle(id, c, sharedConnection);
                } catch (IOException e) {
                  e.printStackTrace();
                }
              }
            };
        ts[j].start();
      }
      for (int j = 0; j < ts.length; j++) {
        ts[j].join();
      }
    } finally {
      sharedConnection.close();
    }
    return errCode;
  }
  @Test
  public void testInterrupt50Percent() throws IOException, InterruptedException {
    final AtomicInteger noEx = new AtomicInteger(0);
    final AtomicInteger badEx = new AtomicInteger(0);
    final AtomicInteger noInt = new AtomicInteger(0);
    final AtomicInteger done = new AtomicInteger(0);
    List<Thread> threads = new ArrayList<Thread>();

    final int nbThread = 100;

    for (int i = 0; i < nbThread; i++) {
      Thread t =
          new Thread() {
            @Override
            public void run() {
              try {
                Table ht = util.getConnection().getTable(tableName);
                Result r = ht.get(new Get(row1));
                noEx.incrementAndGet();
              } catch (IOException e) {
                LOG.info("exception", e);
                if (!(e instanceof InterruptedIOException)
                    || (e instanceof SocketTimeoutException)) {
                  badEx.incrementAndGet();
                } else {
                  if (Thread.currentThread().isInterrupted()) {
                    noInt.incrementAndGet();
                    LOG.info("The thread should NOT be with the 'interrupt' status.");
                  }
                }
              } finally {
                done.incrementAndGet();
              }
            }
          };
      t.setName("TestClientOperationInterrupt #" + i);
      threads.add(t);
      t.start();
    }

    for (int i = 0; i < nbThread / 2; i++) {
      threads.get(i).interrupt();
    }

    boolean stillAlive = true;
    while (stillAlive) {
      stillAlive = false;
      for (Thread t : threads) {
        if (t.isAlive()) {
          stillAlive = true;
        }
      }
      Threads.sleep(10);
    }

    Assert.assertFalse(Thread.currentThread().isInterrupted());

    Assert.assertTrue(
        " noEx: " + noEx.get() + ", badEx=" + badEx.get() + ", noInt=" + noInt.get(),
        noEx.get() == nbThread / 2 && badEx.get() == 0);

    // The problem here is that we need the server to free its handlers to handle all operations
    while (done.get() != nbThread) {
      Thread.sleep(1);
    }

    Table ht = util.getConnection().getTable(tableName);
    Result r = ht.get(new Get(row1));
    Assert.assertFalse(r.isEmpty());
  }
Beispiel #26
0
  /**
   * Reproduce locking up that happens when we get an inopportune sync during setup for zigzaglatch
   * wait. See HBASE-14317. If below is broken, we will see this test timeout because it is locked
   * up.
   *
   * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to set up
   * a dodgy WAL that will throw an exception when we go to append to it.
   */
  @Test(timeout = 20000)
  public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      // Latch to hold up processing until after another operation has had time to run.
      CountDownLatch latch = new CountDownLatch(1);

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected void afterCreatingZigZagLatch() {
        // If throwException set, then append will throw an exception causing the WAL to be
        // rolled. We'll come in here. Hold up processing until a sync can get in before
        // the zigzag has time to complete its setup and get its own sync in. This is what causes
        // the lock up we've seen in production.
        if (throwException) {
          try {
            LOG.info("LATCHED");
            // So, timing can have it that the test can run and the bad flush below happens
            // before we get here. In this case, we'll be stuck waiting on this latch but there
            // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint...
            // because all WALs have rolled. In this case, just give up on test.
            if (!this.latch.await(5, TimeUnit.SECONDS)) {
              LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
            }
          } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
      }

      @Override
      protected void beforeWaitOnSafePoint() {
        if (throwException) {
          LOG.info("COUNTDOWN");
          // Don't countdown latch until someone waiting on it otherwise, the above
          // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll
          // be stuck; test won't go down
          while (this.latch.getCount() <= 0) Threads.sleep(1);
          this.latch.countDown();
        }
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server = Mockito.mock(Server.class);
    Mockito.when(server.getConfiguration()).thenReturn(CONF);
    Mockito.when(server.isStopped()).thenReturn(false);
    Mockito.when(server.isAborted()).thenReturn(false);
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path originalWAL = dodgyWAL.getCurrentFileName();
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      // First get something into memstore. Make a Put and then pull the Cell out of it. Will
      // manage append and sync carefully in below to manufacture hang. We keep adding same
      // edit. WAL subsystem doesn't care.
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());
      // Put something in memstore and out in the WAL. Do a big number of appends so we push
      // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
      for (int i = 0; i < 1000; i++) {
        region.put(put);
      }
      // Set it so we start throwing exceptions.
      LOG.info("SET throwing of exception on append");
      dodgyWAL.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // Get a memstore flush going too so we have same hung profile as up in the issue over
      // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
      // by the zigzaglatch waiting on syncs to come home.
      Thread t =
          new Thread("Flusher") {
            public void run() {
              try {
                if (region.getMemstoreSize() <= 0) {
                  throw new IOException("memstore size=" + region.getMemstoreSize());
                }
                region.flush(false);
              } catch (IOException e) {
                // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
                // when roll completes.
                LOG.info("In flush", e);
              }
              LOG.info("Exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      // Wait until
      while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
      // Now assert I got a new WAL file put in place even though loads of errors above.
      assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
      // Can I append to it?
      dodgyWAL.throwException = false;
      try {
        region.put(put);
      } catch (Exception e) {
        LOG.info("In the put", e);
      }
    } finally {
      // To stop logRoller, its server has to say it is stopped.
      Mockito.when(server.isStopped()).thenReturn(true);
      if (logRoller != null) logRoller.close();
      try {
        if (region != null) region.close();
        if (dodgyWAL != null) dodgyWAL.close();
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
Beispiel #27
0
  /**
   * Reproduce locking up that happens when there's no further syncs after append fails, and causing
   * an isolated sync then infinite wait. See HBASE-16960. If below is broken, we will see this test
   * timeout because it is locked up.
   *
   * <p>Steps for reproduce:<br>
   * 1. Trigger server abort through dodgyWAL1<br>
   * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer event handler thread
   * sleep for a while thus keeping {@code endOfBatch} false<br>
   * 3. Publish a sync then an append which will throw exception, check whether the sync could
   * return
   */
  @Test(timeout = 20000)
  public void testLockup16960() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }

      @Override
      protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter)
          throws IOException {
        if (throwException) {
          throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
        }
        long oldFileLen = 0L;
        oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
        return oldFileLen;
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server =
        new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString());
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
    // go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);

    Path rootDir2 = new Path(dir + getName() + "2");
    final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
    // Add a listener to force ringbuffer event handler sleep for a while
    dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());

    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL1);
    logRoller.addWAL(dodgyWAL2);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());

      LOG.info("SET throwing of exception on append");
      dodgyWAL1.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL1.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // LogRoller call dodgyWAL1.rollWriter get FailedLogCloseException and
      // cause server abort.
      try {
        // wait LogRoller exit.
        Thread.sleep(50);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }

      final CountDownLatch latch = new CountDownLatch(1);

      // make RingBufferEventHandler sleep 1s, so the following sync
      // endOfBatch=false
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("sleep"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      Thread t =
          new Thread("Sync") {
            public void run() {
              try {
                dodgyWAL2.sync();
              } catch (IOException e) {
                LOG.info("In sync", e);
              }
              latch.countDown();
              LOG.info("Sync exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      try {
        // make sure sync have published.
        Thread.sleep(100);
      } catch (InterruptedException e1) {
        e1.printStackTrace();
      }
      // make append throw DamagedWALException
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("DamagedWALException"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      while (latch.getCount() > 0) {
        Threads.sleep(100);
      }
      assertTrue(server.isAborted());
    } finally {
      if (logRoller != null) {
        logRoller.close();
      }
      try {
        if (region != null) {
          region.close();
        }
        if (dodgyWAL1 != null) {
          dodgyWAL1.close();
        }
        if (dodgyWAL2 != null) {
          dodgyWAL2.close();
        }
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }
Beispiel #29
0
  @Test
  public void testConcurrentMetaScannerAndCatalogJanitor() throws Throwable {
    /* TEST PLAN: start with only one region in a table. Have a splitter
     * thread  and metascanner threads that continously scan the meta table for regions.
     * CatalogJanitor from master will run frequently to clean things up
     */
    TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 500);
    setUp();

    final long runtime = 30 * 1000; // 30 sec
    LOG.info("Starting testConcurrentMetaScannerAndCatalogJanitor");
    final TableName TABLENAME = TableName.valueOf("testConcurrentMetaScannerAndCatalogJanitor");
    final byte[] FAMILY = Bytes.toBytes("family");
    TEST_UTIL.createTable(TABLENAME, FAMILY);

    class RegionMetaSplitter extends StoppableImplementation implements Runnable {
      Random random = new Random();
      Throwable ex = null;

      @Override
      public void run() {
        while (!isStopped()) {
          try {
            List<HRegionInfo> regions =
                MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false);

            // select a random region
            HRegionInfo parent = regions.get(random.nextInt(regions.size()));
            if (parent == null || !TABLENAME.equals(parent.getTable())) {
              continue;
            }

            long startKey = 0, endKey = Long.MAX_VALUE;
            byte[] start = parent.getStartKey();
            byte[] end = parent.getEndKey();
            if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) {
              startKey = Bytes.toLong(parent.getStartKey());
            }
            if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) {
              endKey = Bytes.toLong(parent.getEndKey());
            }
            if (startKey == endKey) {
              continue;
            }

            long midKey =
                BigDecimal.valueOf(startKey)
                    .add(BigDecimal.valueOf(endKey))
                    .divideToIntegralValue(BigDecimal.valueOf(2))
                    .longValue();

            HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey));
            HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end);

            MetaTableAccessor.splitRegion(
                connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0));

            Threads.sleep(random.nextInt(200));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }

      void rethrowExceptionIfAny() throws Throwable {
        if (ex != null) {
          throw ex;
        }
      }
    }

    class MetaScannerVerifier extends StoppableImplementation implements Runnable {
      Random random = new Random();
      Throwable ex = null;

      @Override
      public void run() {
        while (!isStopped()) {
          try {
            NavigableMap<HRegionInfo, ServerName> regions =
                MetaScanner.allTableRegions(connection, TABLENAME);

            LOG.info("-------");
            byte[] lastEndKey = HConstants.EMPTY_START_ROW;
            for (HRegionInfo hri : regions.navigableKeySet()) {
              long startKey = 0, endKey = Long.MAX_VALUE;
              if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) {
                startKey = Bytes.toLong(hri.getStartKey());
              }
              if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) {
                endKey = Bytes.toLong(hri.getEndKey());
              }
              LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri);
              Assert.assertTrue(
                  "lastEndKey="
                      + Bytes.toString(lastEndKey)
                      + ", startKey="
                      + Bytes.toString(hri.getStartKey()),
                  Bytes.equals(lastEndKey, hri.getStartKey()));
              lastEndKey = hri.getEndKey();
            }
            Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW));
            LOG.info("-------");
            Threads.sleep(10 + random.nextInt(50));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }

      void rethrowExceptionIfAny() throws Throwable {
        if (ex != null) {
          throw ex;
        }
      }
    }

    RegionMetaSplitter regionMetaSplitter = new RegionMetaSplitter();
    MetaScannerVerifier metaScannerVerifier = new MetaScannerVerifier();

    Thread regionMetaSplitterThread = new Thread(regionMetaSplitter);
    Thread metaScannerVerifierThread = new Thread(metaScannerVerifier);

    regionMetaSplitterThread.start();
    metaScannerVerifierThread.start();

    Threads.sleep(runtime);

    regionMetaSplitter.stop("test finished");
    metaScannerVerifier.stop("test finished");

    regionMetaSplitterThread.join();
    metaScannerVerifierThread.join();

    regionMetaSplitter.rethrowExceptionIfAny();
    metaScannerVerifier.rethrowExceptionIfAny();
  }
  private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    byte[][] splitKeys = generateRandomSplitKeys(4);
    util.startMiniCluster();
    try {
      HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
      Admin admin = table.getConnection().getAdmin();
      Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = -1;
      try (RegionLocator r = table.getRegionLocator()) {
        numRegions = r.getStartKeys().length;
      }
      assertEquals("Should make 5 regions", numRegions, 5);

      // Generate the bulk load files
      util.startMiniMapReduceCluster();
      runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (table.getRegionLocator().getAllRegionLocations().size() != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      util.shutdownMiniMapReduceCluster();
      util.shutdownMiniCluster();
    }
  }