Пример #1
1
  /**
   * Test interruptable while blocking wait on root and meta.
   *
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testInterruptWaitOnMetaAndRoot() throws IOException, InterruptedException {
    HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
    HConnection connection = mockConnection(implementation);

    final CatalogTracker ct = constructAndStartCatalogTracker(connection);
    ServerName hsa = ct.getRootLocation();
    Assert.assertNull(hsa);
    ServerName meta = ct.getMetaLocation();
    Assert.assertNull(meta);
    Thread t =
        new Thread() {
          @Override
          public void run() {
            try {
              ct.waitForMeta();
            } catch (InterruptedException e) {
              throw new RuntimeException("Interrupted", e);
            }
          }
        };
    t.start();
    while (!t.isAlive()) Threads.sleep(1);
    Threads.sleep(1);
    assertTrue(t.isAlive());
    ct.stop();
    // Join the thread... should exit shortly.
    t.join();
  }
Пример #2
1
 private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
   t.start();
   while (!t.isAlive()) {
     // Wait
   }
   // Wait one second.
   Threads.sleep(ms);
   Assert.assertTrue("Assert " + t.getName() + " still waiting", t.isAlive());
 }
  private int runTest() throws Exception {
    LOG.info("Starting the test");

    String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName());
    long runtime = util.getConfiguration().getLong(runtimeKey, DEFAULT_RUN_TIME);

    String numThreadKey = String.format(NUM_THREADS_KEY, this.getClass().getSimpleName());
    numThreads = util.getConfiguration().getInt(numThreadKey, DEFAULT_NUM_THREADS);

    ArrayList<Worker> workers = new ArrayList<>();
    for (int i = 0; i < numThreads; i++) {
      checkException(workers);
      Worker worker = new Worker();
      LOG.info("Launching worker thread " + worker.getName());
      workers.add(worker);
      worker.start();
    }

    Threads.sleep(runtime / 2);
    LOG.info("Stopping creating new tables");
    create_table.set(false);
    Threads.sleep(runtime / 2);
    LOG.info("Runtime is up");
    running.set(false);

    checkException(workers);

    for (Worker worker : workers) {
      worker.join();
    }
    LOG.info("All Worker threads stopped");

    // verify
    LOG.info("Verify actions of all threads succeeded");
    checkException(workers);
    LOG.info("Verify namespaces");
    verifyNamespaces();
    LOG.info("Verify states of all tables");
    verifyTables();

    // RUN HBCK

    HBaseFsck hbck = null;
    try {
      LOG.info("Running hbck");
      hbck = HbckTestingUtil.doFsck(util.getConfiguration(), false);
      if (HbckTestingUtil.inconsistencyFound(hbck)) {
        // Find the inconsistency during HBCK. Leave table and namespace undropped so that
        // we can check outside the test.
        keepObjectsAtTheEnd = true;
      }
      HbckTestingUtil.assertNoErrors(hbck);
      LOG.info("Finished hbck");
    } finally {
      if (hbck != null) {
        hbck.close();
      }
    }
    return 0;
  }
Пример #4
0
      @Override
      public void run() {
        while (!isStopped()) {
          try {
            NavigableMap<HRegionInfo, ServerName> regions =
                MetaScanner.allTableRegions(connection, TABLENAME);

            LOG.info("-------");
            byte[] lastEndKey = HConstants.EMPTY_START_ROW;
            for (HRegionInfo hri : regions.navigableKeySet()) {
              long startKey = 0, endKey = Long.MAX_VALUE;
              if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) {
                startKey = Bytes.toLong(hri.getStartKey());
              }
              if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) {
                endKey = Bytes.toLong(hri.getEndKey());
              }
              LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri);
              Assert.assertTrue(
                  "lastEndKey="
                      + Bytes.toString(lastEndKey)
                      + ", startKey="
                      + Bytes.toString(hri.getStartKey()),
                  Bytes.equals(lastEndKey, hri.getStartKey()));
              lastEndKey = hri.getEndKey();
            }
            Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW));
            LOG.info("-------");
            Threads.sleep(10 + random.nextInt(50));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }
  @Test
  public void testPreWALRestoreSkip() throws Exception {
    LOG.info(TestRegionObserverInterface.class.getName() + ".testPreWALRestoreSkip");
    TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);
    HTable table = util.createTable(tableName, new byte[][] {A, B, C});

    JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer();
    ServerName sn2 = rs1.getRegionServer().getServerName();
    String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName();

    util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes());
    while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) {
      Thread.sleep(100);
    }

    Put put = new Put(ROW);
    put.add(A, A, A);
    put.add(B, B, B);
    put.add(C, C, C);
    table.put(put);
    table.flushCommits();

    cluster.killRegionServer(rs1.getRegionServer().getServerName());
    Threads.sleep(20000); // just to be sure that the kill has fully started.
    util.waitUntilAllRegionsAssigned(tableName);

    verifyMethodResult(
        SimpleRegionObserver.class,
        new String[] {"getCtPreWALRestore", "getCtPostWALRestore"},
        tableName,
        new Integer[] {0, 0});

    util.deleteTable(tableName);
    table.close();
  }
  @Test(timeout = 30000)
  public void testInfo() {
    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
    MetricsMasterWrapperImpl info = new MetricsMasterWrapperImpl(master);
    assertEquals(master.getSplitPlanCount(), info.getSplitPlanCount(), 0);
    assertEquals(master.getMergePlanCount(), info.getMergePlanCount(), 0);
    assertEquals(master.getAverageLoad(), info.getAverageLoad(), 0);
    assertEquals(master.getClusterId(), info.getClusterId());
    assertEquals(master.getMasterActiveTime(), info.getActiveTime());
    assertEquals(master.getMasterStartTime(), info.getStartTime());
    assertEquals(master.getMasterCoprocessors().length, info.getCoprocessors().length);
    assertEquals(
        master.getServerManager().getOnlineServersList().size(), info.getNumRegionServers());
    assertEquals(5, info.getNumRegionServers());

    String zkServers = info.getZookeeperQuorum();
    assertEquals(zkServers.split(",").length, TEST_UTIL.getZkCluster().getZooKeeperServerNum());

    final int index = 3;
    LOG.info("Stopping " + TEST_UTIL.getMiniHBaseCluster().getRegionServer(index));
    TEST_UTIL.getMiniHBaseCluster().stopRegionServer(index, false);
    TEST_UTIL.getMiniHBaseCluster().waitOnRegionServer(index);
    // We stopped the regionserver but could take a while for the master to notice it so hang here
    // until it does... then move forward to see if metrics wrapper notices.
    while (TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size()
        != 4) {
      Threads.sleep(10);
    }
    assertEquals(4, info.getNumRegionServers());
    assertEquals(1, info.getNumDeadRegionServers());
    assertEquals(1, info.getNumWALFiles());
  }
    @Override
    public void run() {
      while (running.get()) {
        switch (random.nextInt() % 2) {
          case 0: // start a server
            try {
              cluster.startServer();
            } catch (Exception e) {
              LOG.warn(e);
              exception.compareAndSet(null, e);
            }
            break;

          case 1: // stop a server
            try {
              cluster.stopRandomServer();
            } catch (Exception e) {
              LOG.warn(e);
              exception.compareAndSet(null, e);
            }
          default:
        }

        Threads.sleep(100);
      }
    }
 @Override
 public void preGetOp(
     final ObserverContext<RegionCoprocessorEnvironment> e,
     final Get get,
     final List<Cell> results)
     throws IOException {
   Threads.sleep(2500);
 }
Пример #9
0
 /**
  * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
  * for this to avoid re-looking for the integer values.
  */
 protected static void sleepBeforeRetry(String msg, int sleepMultiplier) {
   if (sleepMultiplier > hdfsClientRetriesNumber) {
     LOG.warn(msg + ", retries exhausted");
     return;
   }
   LOG.info(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
   Threads.sleep(baseSleepBeforeRetries * sleepMultiplier);
 }
Пример #10
0
 /**
  * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
  * for this to avoid re-looking for the integer values.
  */
 private static void sleepBeforeRetry(
     String msg, int sleepMultiplier, int baseSleepBeforeRetries, int hdfsClientRetriesNumber) {
   if (sleepMultiplier > hdfsClientRetriesNumber) {
     LOG.debug(msg + ", retries exhausted");
     return;
   }
   LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
   Threads.sleep((long) baseSleepBeforeRetries * sleepMultiplier);
 }
  @Test
  public void testRecovery() throws Exception {
    LOG.info(TestRegionObserverInterface.class.getName() + ".testRecovery");
    TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + ".testRecovery");
    HTable table = util.createTable(tableName, new byte[][] {A, B, C});
    try {
      JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServer();
      ServerName sn2 = rs1.getRegionServer().getServerName();
      String regEN = table.getRegionLocations().firstEntry().getKey().getEncodedName();

      util.getHBaseAdmin().move(regEN.getBytes(), sn2.getServerName().getBytes());
      while (!sn2.equals(table.getRegionLocations().firstEntry().getValue())) {
        Thread.sleep(100);
      }

      Put put = new Put(ROW);
      put.add(A, A, A);
      put.add(B, B, B);
      put.add(C, C, C);
      table.put(put);

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {
            "hadPreGet",
            "hadPostGet",
            "hadPrePut",
            "hadPostPut",
            "hadPreBatchMutate",
            "hadPostBatchMutate",
            "hadDelete"
          },
          tableName,
          new Boolean[] {false, false, true, true, true, true, false});

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"getCtPreWALRestore", "getCtPostWALRestore", "getCtPrePut", "getCtPostPut"},
          tableName,
          new Integer[] {0, 0, 1, 1});

      cluster.killRegionServer(rs1.getRegionServer().getServerName());
      Threads.sleep(1000); // Let the kill soak in.
      util.waitUntilAllRegionsAssigned(tableName);
      LOG.info("All regions assigned");

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"getCtPrePut", "getCtPostPut"},
          tableName,
          new Integer[] {0, 0});
    } finally {
      util.deleteTable(tableName);
      table.close();
    }
  }
Пример #12
0
 @Override
 public Result postAppend(
     final ObserverContext<RegionCoprocessorEnvironment> e,
     final Append append,
     final Result result)
     throws IOException {
   if (ct.incrementAndGet() == 1) {
     Threads.sleep(sleepTime.get());
   }
   return result;
 }
Пример #13
0
 @Override
 protected void beforeWaitOnSafePoint() {
   if (throwException) {
     LOG.info("COUNTDOWN");
     // Don't countdown latch until someone waiting on it otherwise, the above
     // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll
     // be stuck; test won't go down
     while (this.latch.getCount() <= 0) Threads.sleep(1);
     this.latch.countDown();
   }
 }
Пример #14
0
 @Override
 public MultiResponse multi(RpcController controller, MultiRequest request)
     throws ServiceException {
   int concurrentInvocations = this.multiInvocationsCount.incrementAndGet();
   try {
     if (concurrentInvocations >= tooManyMultiRequests) {
       throw new ServiceException(
           new RegionTooBusyException("concurrentInvocations=" + concurrentInvocations));
     }
     Threads.sleep(multiPause);
     return doMultiResponse(meta, sequenceids, request);
   } finally {
     this.multiInvocationsCount.decrementAndGet();
   }
 }
  public void testRpcWithChaosMonkey(boolean isSyncClient) throws Throwable {
    LOG.info("Starting test");
    Cluster cluster = new Cluster(10, 100);
    for (int i = 0; i < 10; i++) {
      cluster.startServer();
    }

    ArrayList<SimpleClient> clients = new ArrayList<>();

    // all threads should share the same rpc client
    AbstractRpcClient<?> rpcClient = createRpcClient(conf, isSyncClient);

    for (int i = 0; i < 30; i++) {
      String clientId = "client_" + i + "_";
      LOG.info("Starting client: " + clientId);
      SimpleClient client = new SimpleClient(cluster, rpcClient, clientId);
      client.start();
      clients.add(client);
    }

    LOG.info("Starting MiniChaosMonkey");
    MiniChaosMonkey cm = new MiniChaosMonkey(cluster);
    cm.start();

    Threads.sleep(30000);

    LOG.info("Stopping MiniChaosMonkey");
    cm.stopRunning();
    cm.join();
    cm.rethrowException();

    LOG.info("Stopping clients");
    for (SimpleClient client : clients) {
      LOG.info("Stopping client: " + client.id);
      LOG.info(client.id + " numCalls:" + client.numCalls);
      client.stopRunning();
      client.join();
      client.rethrowException();
      assertTrue(client.numCalls > 10);
    }

    LOG.info("Stopping RpcClient");
    rpcClient.close();

    LOG.info("Stopping Cluster");
    cluster.stopRunning();
  }
Пример #16
0
      @Override
      public void run() {
        while (!isStopped()) {
          try {
            List<HRegionInfo> regions =
                MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false);

            // select a random region
            HRegionInfo parent = regions.get(random.nextInt(regions.size()));
            if (parent == null || !TABLENAME.equals(parent.getTable())) {
              continue;
            }

            long startKey = 0, endKey = Long.MAX_VALUE;
            byte[] start = parent.getStartKey();
            byte[] end = parent.getEndKey();
            if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) {
              startKey = Bytes.toLong(parent.getStartKey());
            }
            if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) {
              endKey = Bytes.toLong(parent.getEndKey());
            }
            if (startKey == endKey) {
              continue;
            }

            long midKey =
                BigDecimal.valueOf(startKey)
                    .add(BigDecimal.valueOf(endKey))
                    .divideToIntegralValue(BigDecimal.valueOf(2))
                    .longValue();

            HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey));
            HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end);

            MetaTableAccessor.splitRegion(
                connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0));

            Threads.sleep(random.nextInt(200));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }
Пример #17
0
  @Test(timeout = 300000)
  public void testClusterRestart() throws Exception {
    UTIL.startMiniCluster(3);
    while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
      Threads.sleep(1);
    }
    LOG.info("\n\nCreating tables");
    for (byte[] TABLE : TABLES) {
      UTIL.createTable(TABLE, FAMILY);
    }
    for (byte[] TABLE : TABLES) {
      UTIL.waitTableEnabled(TABLE);
    }

    List<HRegionInfo> allRegions = MetaScanner.listAllRegions(UTIL.getConfiguration(), true);
    assertEquals(4, allRegions.size());

    LOG.info("\n\nShutting down cluster");
    UTIL.shutdownMiniHBaseCluster();

    LOG.info("\n\nSleeping a bit");
    Thread.sleep(2000);

    LOG.info("\n\nStarting cluster the second time");
    UTIL.restartHBaseCluster(3);

    // Need to use a new 'Configuration' so we make a new HConnection.
    // Otherwise we're reusing an HConnection that has gone stale because
    // the shutdown of the cluster also called shut of the connection.
    allRegions = MetaScanner.listAllRegions(new Configuration(UTIL.getConfiguration()), true);
    assertEquals(4, allRegions.size());
    LOG.info("\n\nWaiting for tables to be available");
    for (byte[] TABLE : TABLES) {
      try {
        UTIL.createTable(TABLE, FAMILY);
        assertTrue("Able to create table that should already exist", false);
      } catch (TableExistsException tee) {
        LOG.info("Table already exists as expected");
      }
      UTIL.waitTableAvailable(TABLE);
    }
  }
Пример #18
0
  public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU)
      throws IOException, InterruptedException {
    // Stop master
    HMaster master = HTU.getHBaseCluster().getMaster();
    ServerName masterAddr = master.getServerName();
    master.stopMaster();

    Log.info("Waiting until master thread exits");
    while (HTU.getHBaseCluster().getMasterThread() != null
        && HTU.getHBaseCluster().getMasterThread().isAlive()) {
      Threads.sleep(100);
    }

    HRegionServer.TEST_SKIP_REPORTING_TRANSITION = true;
    // Master is down, so is the meta. We need to assign it somewhere
    // so that regions can be assigned during the mocking phase.
    HRegionServer hrs = HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
    ZooKeeperWatcher zkw = hrs.getZooKeeper();
    MetaTableLocator mtl = new MetaTableLocator();
    ServerName sn = mtl.getMetaRegionLocation(zkw);
    if (sn != null && !masterAddr.equals(sn)) {
      return;
    }

    ProtobufUtil.openRegion(
        hrs.getRSRpcServices(), hrs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO);
    while (true) {
      sn = mtl.getMetaRegionLocation(zkw);
      if (sn != null
          && sn.equals(hrs.getServerName())
          && hrs.onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
        break;
      }
      Thread.sleep(100);
    }
  }
  @Test
  public void testInterrupt50Percent() throws IOException, InterruptedException {
    final AtomicInteger noEx = new AtomicInteger(0);
    final AtomicInteger badEx = new AtomicInteger(0);
    final AtomicInteger noInt = new AtomicInteger(0);
    final AtomicInteger done = new AtomicInteger(0);
    List<Thread> threads = new ArrayList<Thread>();

    final int nbThread = 100;

    for (int i = 0; i < nbThread; i++) {
      Thread t =
          new Thread() {
            @Override
            public void run() {
              try {
                Table ht = util.getConnection().getTable(tableName);
                Result r = ht.get(new Get(row1));
                noEx.incrementAndGet();
              } catch (IOException e) {
                LOG.info("exception", e);
                if (!(e instanceof InterruptedIOException)
                    || (e instanceof SocketTimeoutException)) {
                  badEx.incrementAndGet();
                } else {
                  if (Thread.currentThread().isInterrupted()) {
                    noInt.incrementAndGet();
                    LOG.info("The thread should NOT be with the 'interrupt' status.");
                  }
                }
              } finally {
                done.incrementAndGet();
              }
            }
          };
      t.setName("TestClientOperationInterrupt #" + i);
      threads.add(t);
      t.start();
    }

    for (int i = 0; i < nbThread / 2; i++) {
      threads.get(i).interrupt();
    }

    boolean stillAlive = true;
    while (stillAlive) {
      stillAlive = false;
      for (Thread t : threads) {
        if (t.isAlive()) {
          stillAlive = true;
        }
      }
      Threads.sleep(10);
    }

    Assert.assertFalse(Thread.currentThread().isInterrupted());

    Assert.assertTrue(
        " noEx: " + noEx.get() + ", badEx=" + badEx.get() + ", noInt=" + noInt.get(),
        noEx.get() == nbThread / 2 && badEx.get() == 0);

    // The problem here is that we need the server to free its handlers to handle all operations
    while (done.get() != nbThread) {
      Thread.sleep(1);
    }

    Table ht = util.getConnection().getTable(tableName);
    Result r = ht.get(new Get(row1));
    Assert.assertFalse(r.isEmpty());
  }
Пример #20
0
  /**
   * Test the global mem store size in the region server is equal to sum of each region's mem store
   * size
   *
   * @throws Exception
   */
  @Test
  public void testGlobalMemStore() throws Exception {
    // Start the cluster
    LOG.info("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(1, regionServerNum);
    cluster = TEST_UTIL.getHBaseCluster();
    LOG.info("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();

    // Create a table with regions
    TableName table = TableName.valueOf("TestGlobalMemStoreSize");
    byte[] family = Bytes.toBytes("family");
    LOG.info("Creating table with " + regionNum + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) {
      numRegions = r.getStartKeys().length;
    }
    assertEquals(regionNum, numRegions);
    waitForAllRegionsAssigned();

    for (HRegionServer server : getOnlineRegionServers()) {
      long globalMemStoreSize = 0;
      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        globalMemStoreSize +=
            server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize();
      }
      assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize);
    }

    // check the global memstore size after flush
    int i = 0;
    for (HRegionServer server : getOnlineRegionServers()) {
      LOG.info(
          "Starting flushes on "
              + server.getServerName()
              + ", size="
              + server.getRegionServerAccounting().getGlobalMemstoreSize());

      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
        flush(r, server);
      }
      LOG.info("Post flush on " + server.getServerName());
      long now = System.currentTimeMillis();
      long timeout = now + 1000;
      while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0
          && timeout < System.currentTimeMillis()) {
        Threads.sleep(10);
      }
      long size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      if (size > 0) {
        // If size > 0, see if its because the meta region got edits while
        // our test was running....
        for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
          Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
          long l = r.getMemstoreSize();
          if (l > 0) {
            // Only meta could have edits at this stage.  Give it another flush
            // clear them.
            assertTrue(regionInfo.isMetaRegion());
            LOG.info(r.toString() + " " + l + ", reflushing");
            r.flush(true);
          }
        }
      }
      size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size);
    }

    ht.close();
    TEST_UTIL.shutdownMiniCluster();
  }
Пример #21
0
  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }
Пример #22
0
  /**
   * Reproduce locking up that happens when we get an inopportune sync during setup for zigzaglatch
   * wait. See HBASE-14317. If below is broken, we will see this test timeout because it is locked
   * up.
   *
   * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to set up
   * a dodgy WAL that will throw an exception when we go to append to it.
   */
  @Test(timeout = 20000)
  public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      // Latch to hold up processing until after another operation has had time to run.
      CountDownLatch latch = new CountDownLatch(1);

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected void afterCreatingZigZagLatch() {
        // If throwException set, then append will throw an exception causing the WAL to be
        // rolled. We'll come in here. Hold up processing until a sync can get in before
        // the zigzag has time to complete its setup and get its own sync in. This is what causes
        // the lock up we've seen in production.
        if (throwException) {
          try {
            LOG.info("LATCHED");
            // So, timing can have it that the test can run and the bad flush below happens
            // before we get here. In this case, we'll be stuck waiting on this latch but there
            // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint...
            // because all WALs have rolled. In this case, just give up on test.
            if (!this.latch.await(5, TimeUnit.SECONDS)) {
              LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
            }
          } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
      }

      @Override
      protected void beforeWaitOnSafePoint() {
        if (throwException) {
          LOG.info("COUNTDOWN");
          // Don't countdown latch until someone waiting on it otherwise, the above
          // afterCreatingZigZagLatch will get to the latch and no one will ever free it and we'll
          // be stuck; test won't go down
          while (this.latch.getCount() <= 0) Threads.sleep(1);
          this.latch.countDown();
        }
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server = Mockito.mock(Server.class);
    Mockito.when(server.getConfiguration()).thenReturn(CONF);
    Mockito.when(server.isStopped()).thenReturn(false);
    Mockito.when(server.isAborted()).thenReturn(false);
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path originalWAL = dodgyWAL.getCurrentFileName();
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      // First get something into memstore. Make a Put and then pull the Cell out of it. Will
      // manage append and sync carefully in below to manufacture hang. We keep adding same
      // edit. WAL subsystem doesn't care.
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());
      // Put something in memstore and out in the WAL. Do a big number of appends so we push
      // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
      for (int i = 0; i < 1000; i++) {
        region.put(put);
      }
      // Set it so we start throwing exceptions.
      LOG.info("SET throwing of exception on append");
      dodgyWAL.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // Get a memstore flush going too so we have same hung profile as up in the issue over
      // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
      // by the zigzaglatch waiting on syncs to come home.
      Thread t =
          new Thread("Flusher") {
            public void run() {
              try {
                if (region.getMemstoreSize() <= 0) {
                  throw new IOException("memstore size=" + region.getMemstoreSize());
                }
                region.flush(false);
              } catch (IOException e) {
                // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
                // when roll completes.
                LOG.info("In flush", e);
              }
              LOG.info("Exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      // Wait until
      while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
      // Now assert I got a new WAL file put in place even though loads of errors above.
      assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
      // Can I append to it?
      dodgyWAL.throwException = false;
      try {
        region.put(put);
      } catch (Exception e) {
        LOG.info("In the put", e);
      }
    } finally {
      // To stop logRoller, its server has to say it is stopped.
      Mockito.when(server.isStopped()).thenReturn(true);
      if (logRoller != null) logRoller.close();
      try {
        if (region != null) region.close();
        if (dodgyWAL != null) dodgyWAL.close();
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
Пример #23
0
  /**
   * Reproduce locking up that happens when there's no further syncs after append fails, and causing
   * an isolated sync then infinite wait. See HBASE-16960. If below is broken, we will see this test
   * timeout because it is locked up.
   *
   * <p>Steps for reproduce:<br>
   * 1. Trigger server abort through dodgyWAL1<br>
   * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer event handler thread
   * sleep for a while thus keeping {@code endOfBatch} false<br>
   * 3. Publish a sync then an append which will throw exception, check whether the sync could
   * return
   */
  @Test(timeout = 20000)
  public void testLockup16960() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {
      // Set this when want the WAL to start throwing exceptions.
      volatile boolean throwException = false;

      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
          throws IOException {
        super(fs, root, logDir, conf);
      }

      @Override
      protected Writer createWriterInstance(Path path) throws IOException {
        final Writer w = super.createWriterInstance(path);
        return new Writer() {
          @Override
          public void close() throws IOException {
            w.close();
          }

          @Override
          public void sync() throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
            }
            w.sync();
          }

          @Override
          public void append(Entry entry) throws IOException {
            if (throwException) {
              throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
            }
            w.append(entry);
          }

          @Override
          public long getLength() {
            return w.getLength();
          }
        };
      }

      @Override
      protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter)
          throws IOException {
        if (throwException) {
          throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
        }
        long oldFileLen = 0L;
        oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
        return oldFileLen;
      }
    }

    // Mocked up server and regionserver services. Needed below.
    Server server =
        new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString());
    RegionServerServices services = Mockito.mock(RegionServerServices.class);

    CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);

    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
    // go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);

    Path rootDir2 = new Path(dir + getName() + "2");
    final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
    // Add a listener to force ringbuffer event handler sleep for a while
    dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());

    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL1);
    logRoller.addWAL(dodgyWAL2);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
      Put put = new Put(bytes);
      put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
      WALKey key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              htd.getTableName(),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      WALEdit edit = new WALEdit();
      CellScanner CellScanner = put.cellScanner();
      assertTrue(CellScanner.advance());
      edit.add(CellScanner.current());

      LOG.info("SET throwing of exception on append");
      dodgyWAL1.throwException = true;
      // This append provokes a WAL roll request
      dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
      boolean exception = false;
      try {
        dodgyWAL1.sync();
      } catch (Exception e) {
        exception = true;
      }
      assertTrue("Did not get sync exception", exception);

      // LogRoller call dodgyWAL1.rollWriter get FailedLogCloseException and
      // cause server abort.
      try {
        // wait LogRoller exit.
        Thread.sleep(50);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }

      final CountDownLatch latch = new CountDownLatch(1);

      // make RingBufferEventHandler sleep 1s, so the following sync
      // endOfBatch=false
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("sleep"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      Thread t =
          new Thread("Sync") {
            public void run() {
              try {
                dodgyWAL2.sync();
              } catch (IOException e) {
                LOG.info("In sync", e);
              }
              latch.countDown();
              LOG.info("Sync exiting");
            };
          };
      t.setDaemon(true);
      t.start();
      try {
        // make sure sync have published.
        Thread.sleep(100);
      } catch (InterruptedException e1) {
        e1.printStackTrace();
      }
      // make append throw DamagedWALException
      key =
          new WALKey(
              region.getRegionInfo().getEncodedNameAsBytes(),
              TableName.valueOf("DamagedWALException"),
              System.currentTimeMillis(),
              mvcc,
              scopes);
      dodgyWAL2.append(region.getRegionInfo(), key, edit, true);

      while (latch.getCount() > 0) {
        Threads.sleep(100);
      }
      assertTrue(server.isAborted());
    } finally {
      if (logRoller != null) {
        logRoller.close();
      }
      try {
        if (region != null) {
          region.close();
        }
        if (dodgyWAL1 != null) {
          dodgyWAL1.close();
        }
        if (dodgyWAL2 != null) {
          dodgyWAL2.close();
        }
      } catch (Exception e) {
        LOG.info("On way out", e);
      }
    }
  }
Пример #24
0
  private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    byte[][] splitKeys = generateRandomSplitKeys(4);
    util.startMiniCluster();
    try {
      HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
      Admin admin = table.getConnection().getAdmin();
      Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = -1;
      try (RegionLocator r = table.getRegionLocator()) {
        numRegions = r.getStartKeys().length;
      }
      assertEquals("Should make 5 regions", numRegions, 5);

      // Generate the bulk load files
      util.startMiniMapReduceCluster();
      runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (table.getRegionLocator().getAllRegionLocations().size() != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      util.shutdownMiniMapReduceCluster();
      util.shutdownMiniCluster();
    }
  }
Пример #25
0
  /**
   * Test for HBASE-4288. Throw an IOE when trying to verify meta region and prove it doesn't cause
   * master shutdown.
   *
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a>
   * @throws IOException
   * @throws InterruptedException
   * @throws KeeperException
   */
  @Test
  public void testServerNotRunningIOException()
      throws IOException, InterruptedException, KeeperException {
    // Mock an HRegionInterface.
    final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
    HConnection connection = mockConnection(implementation);

    // If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
    // the first time.  'Succeed' the second time we are called.
    Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any()))
        .thenThrow(new IOException("Server not running, aborting"))
        .thenReturn(new HRegionInfo());

    // After we encounter the above 'Server not running', we should catch the
    // IOE and go into retrying for the meta mode.  We'll do gets on -ROOT- to
    // get new meta location.  Return something so this 'get' succeeds
    // (here we mock up getRegionServerWithRetries, the wrapper around
    // the actual get).

    // TODO: Refactor.  This method has been moved out of HConnection.
    // It works for now but has been deprecated.
    Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>) Mockito.any()))
        .thenReturn(getMetaTableRowResult());

    // Now start up the catalogtracker with our doctored Connection.
    final CatalogTracker ct = constructAndStartCatalogTracker(connection);
    try {
      // Set a location for root and meta.
      RootLocationEditor.setRootLocation(this.watcher, SN);
      ct.setMetaLocation(SN);
      // Call the method that HBASE-4288 calls.  It will try and verify the
      // meta location and will fail on first attempt then go into a long wait.
      // So, do this in a thread and then reset meta location to break it out
      // of its wait after a bit of time.
      final AtomicBoolean metaSet = new AtomicBoolean(false);
      final CountDownLatch latch = new CountDownLatch(1);
      Thread t =
          new Thread() {
            @Override
            public void run() {
              try {
                latch.countDown();
                metaSet.set(ct.waitForMeta(100000) != null);
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };
      t.start();
      latch.await();
      Threads.sleep(1);
      // Now reset the meta as though it were redeployed.
      ct.setMetaLocation(SN);
      t.join();
      Assert.assertTrue(metaSet.get());
    } finally {
      // Clean out root and meta locations or later tests will be confused...
      // they presume start fresh in zk.
      ct.resetMetaLocation();
    }
  }
Пример #26
0
  @Test
  public void testConcurrentMetaScannerAndCatalogJanitor() throws Throwable {
    /* TEST PLAN: start with only one region in a table. Have a splitter
     * thread  and metascanner threads that continously scan the meta table for regions.
     * CatalogJanitor from master will run frequently to clean things up
     */
    TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 500);
    setUp();

    final long runtime = 30 * 1000; // 30 sec
    LOG.info("Starting testConcurrentMetaScannerAndCatalogJanitor");
    final TableName TABLENAME = TableName.valueOf("testConcurrentMetaScannerAndCatalogJanitor");
    final byte[] FAMILY = Bytes.toBytes("family");
    TEST_UTIL.createTable(TABLENAME, FAMILY);

    class RegionMetaSplitter extends StoppableImplementation implements Runnable {
      Random random = new Random();
      Throwable ex = null;

      @Override
      public void run() {
        while (!isStopped()) {
          try {
            List<HRegionInfo> regions =
                MetaScanner.listAllRegions(TEST_UTIL.getConfiguration(), connection, false);

            // select a random region
            HRegionInfo parent = regions.get(random.nextInt(regions.size()));
            if (parent == null || !TABLENAME.equals(parent.getTable())) {
              continue;
            }

            long startKey = 0, endKey = Long.MAX_VALUE;
            byte[] start = parent.getStartKey();
            byte[] end = parent.getEndKey();
            if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) {
              startKey = Bytes.toLong(parent.getStartKey());
            }
            if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) {
              endKey = Bytes.toLong(parent.getEndKey());
            }
            if (startKey == endKey) {
              continue;
            }

            long midKey =
                BigDecimal.valueOf(startKey)
                    .add(BigDecimal.valueOf(endKey))
                    .divideToIntegralValue(BigDecimal.valueOf(2))
                    .longValue();

            HRegionInfo splita = new HRegionInfo(TABLENAME, start, Bytes.toBytes(midKey));
            HRegionInfo splitb = new HRegionInfo(TABLENAME, Bytes.toBytes(midKey), end);

            MetaTableAccessor.splitRegion(
                connection, parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0));

            Threads.sleep(random.nextInt(200));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }

      void rethrowExceptionIfAny() throws Throwable {
        if (ex != null) {
          throw ex;
        }
      }
    }

    class MetaScannerVerifier extends StoppableImplementation implements Runnable {
      Random random = new Random();
      Throwable ex = null;

      @Override
      public void run() {
        while (!isStopped()) {
          try {
            NavigableMap<HRegionInfo, ServerName> regions =
                MetaScanner.allTableRegions(connection, TABLENAME);

            LOG.info("-------");
            byte[] lastEndKey = HConstants.EMPTY_START_ROW;
            for (HRegionInfo hri : regions.navigableKeySet()) {
              long startKey = 0, endKey = Long.MAX_VALUE;
              if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) {
                startKey = Bytes.toLong(hri.getStartKey());
              }
              if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) {
                endKey = Bytes.toLong(hri.getEndKey());
              }
              LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri);
              Assert.assertTrue(
                  "lastEndKey="
                      + Bytes.toString(lastEndKey)
                      + ", startKey="
                      + Bytes.toString(hri.getStartKey()),
                  Bytes.equals(lastEndKey, hri.getStartKey()));
              lastEndKey = hri.getEndKey();
            }
            Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW));
            LOG.info("-------");
            Threads.sleep(10 + random.nextInt(50));
          } catch (Throwable e) {
            ex = e;
            Assert.fail(StringUtils.stringifyException(e));
          }
        }
      }

      void rethrowExceptionIfAny() throws Throwable {
        if (ex != null) {
          throw ex;
        }
      }
    }

    RegionMetaSplitter regionMetaSplitter = new RegionMetaSplitter();
    MetaScannerVerifier metaScannerVerifier = new MetaScannerVerifier();

    Thread regionMetaSplitterThread = new Thread(regionMetaSplitter);
    Thread metaScannerVerifierThread = new Thread(metaScannerVerifier);

    regionMetaSplitterThread.start();
    metaScannerVerifierThread.start();

    Threads.sleep(runtime);

    regionMetaSplitter.stop("test finished");
    metaScannerVerifier.stop("test finished");

    regionMetaSplitterThread.join();
    metaScannerVerifierThread.join();

    regionMetaSplitter.rethrowExceptionIfAny();
    metaScannerVerifier.rethrowExceptionIfAny();
  }
Пример #27
0
  /** This tests retaining assignments on a cluster restart */
  @Test(timeout = 300000)
  public void testRetainAssignmentOnRestart() throws Exception {
    UTIL.startMiniCluster(2);
    while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
      Threads.sleep(1);
    }
    // Turn off balancer
    UTIL.getMiniHBaseCluster().getMaster().getMasterRpcServices().synchronousBalanceSwitch(false);
    LOG.info("\n\nCreating tables");
    for (byte[] TABLE : TABLES) {
      UTIL.createTable(TABLE, FAMILY);
    }
    for (byte[] TABLE : TABLES) {
      UTIL.waitTableEnabled(TABLE);
    }

    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
    UTIL.waitUntilNoRegionsInTransition(120000);

    // We don't have to use SnapshotOfRegionAssignmentFromMeta.
    // We use it here because AM used to use it to load all user region placements
    SnapshotOfRegionAssignmentFromMeta snapshot =
        new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection());
    snapshot.initialize();
    Map<HRegionInfo, ServerName> regionToRegionServerMap = snapshot.getRegionToRegionServerMap();

    MiniHBaseCluster cluster = UTIL.getHBaseCluster();
    List<JVMClusterUtil.RegionServerThread> threads = cluster.getLiveRegionServerThreads();
    assertEquals(2, threads.size());
    int[] rsPorts = new int[3];
    for (int i = 0; i < 2; i++) {
      rsPorts[i] = threads.get(i).getRegionServer().getServerName().getPort();
    }
    rsPorts[2] = cluster.getMaster().getServerName().getPort();
    for (ServerName serverName : regionToRegionServerMap.values()) {
      boolean found = false; // Test only, no need to optimize
      for (int k = 0; k < 3 && !found; k++) {
        found = serverName.getPort() == rsPorts[k];
      }
      assertTrue(found);
    }

    LOG.info("\n\nShutting down HBase cluster");
    cluster.shutdown();
    cluster.waitUntilShutDown();

    LOG.info("\n\nSleeping a bit");
    Thread.sleep(2000);

    LOG.info("\n\nStarting cluster the second time with the same ports");
    try {
      cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 4);
      master = cluster.startMaster().getMaster();
      for (int i = 0; i < 3; i++) {
        cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, rsPorts[i]);
        cluster.startRegionServer();
      }
    } finally {
      // Reset region server port so as not to conflict with other tests
      cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, 0);
      cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 2);
    }

    // Make sure live regionservers are on the same host/port
    List<ServerName> localServers = master.getServerManager().getOnlineServersList();
    assertEquals(4, localServers.size());
    for (int i = 0; i < 3; i++) {
      boolean found = false;
      for (ServerName serverName : localServers) {
        if (serverName.getPort() == rsPorts[i]) {
          found = true;
          break;
        }
      }
      assertTrue(found);
    }

    // Wait till master is initialized and all regions are assigned
    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
    int expectedRegions = regionToRegionServerMap.size() + 1;
    while (!master.isInitialized()
        || regionStates.getRegionAssignments().size() != expectedRegions) {
      Threads.sleep(100);
    }

    snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection());
    snapshot.initialize();
    Map<HRegionInfo, ServerName> newRegionToRegionServerMap = snapshot.getRegionToRegionServerMap();
    assertEquals(regionToRegionServerMap.size(), newRegionToRegionServerMap.size());
    for (Map.Entry<HRegionInfo, ServerName> entry : newRegionToRegionServerMap.entrySet()) {
      if (TableName.NAMESPACE_TABLE_NAME.equals(entry.getKey().getTable())) continue;
      ServerName oldServer = regionToRegionServerMap.get(entry.getKey());
      ServerName currentServer = entry.getValue();
      assertEquals(oldServer.getHostAndPort(), currentServer.getHostAndPort());
      assertNotEquals(oldServer.getStartcode(), currentServer.getStartcode());
    }
  }