Beispiel #1
0
 /**
  * A region is offline, won't be in transition any more. Its state should be the specified
  * expected state, which can only be Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
  */
 public void regionOffline(final HRegionInfo hri, final State expectedState) {
   Preconditions.checkArgument(
       expectedState == null || RegionState.isUnassignable(expectedState),
       "Offlined region should not be " + expectedState);
   if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
     // Remove it from all region maps
     deleteRegion(hri);
     return;
   }
   State newState = expectedState == null ? State.OFFLINE : expectedState;
   updateRegionState(hri, newState);
   String encodedName = hri.getEncodedName();
   synchronized (this) {
     regionsInTransition.remove(encodedName);
     ServerName oldServerName = regionAssignments.remove(hri);
     if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
       if (newState == State.MERGED
           || newState == State.SPLIT
           || hri.isMetaRegion()
           || tableStateManager.isTableState(
               hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
         // Offline the region only if it's merged/split, or the table is disabled/disabling.
         // Otherwise, offline it from this server only when it is online on a different server.
         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
         removeFromServerHoldings(oldServerName, hri);
         removeFromReplicaMapping(hri);
       } else {
         // Need to remember it so that we can offline it from this
         // server when it is online on a different server.
         oldAssignments.put(encodedName, oldServerName);
       }
     }
   }
 }
Beispiel #2
0
  /**
   * This is an EXPENSIVE clone. Cloning though is the safest thing to do. Can't let out original
   * since it can change and at least the load balancer wants to iterate this exported list. We need
   * to synchronize on regions since all access to this.servers is under a lock on this.regions.
   *
   * @return A clone of current assignments by table.
   */
  protected Map<TableName, Map<ServerName, List<HRegionInfo>>> getAssignmentsByTable() {
    Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
        new HashMap<TableName, Map<ServerName, List<HRegionInfo>>>();
    synchronized (this) {
      if (!server
          .getConfiguration()
          .getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false)) {
        Map<ServerName, List<HRegionInfo>> svrToRegions =
            new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
        for (Map.Entry<ServerName, Set<HRegionInfo>> e : serverHoldings.entrySet()) {
          svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
        }
        result.put(TableName.valueOf(HConstants.ENSEMBLE_TABLE_NAME), svrToRegions);
      } else {
        for (Map.Entry<ServerName, Set<HRegionInfo>> e : serverHoldings.entrySet()) {
          for (HRegionInfo hri : e.getValue()) {
            if (hri.isMetaRegion()) continue;
            TableName tablename = hri.getTable();
            Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
            if (svrToRegions == null) {
              svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
              result.put(tablename, svrToRegions);
            }
            List<HRegionInfo> regions = svrToRegions.get(e.getKey());
            if (regions == null) {
              regions = new ArrayList<HRegionInfo>();
              svrToRegions.put(e.getKey(), regions);
            }
            regions.add(hri);
          }
        }
      }
    }

    Map<ServerName, ServerLoad> onlineSvrs = serverManager.getOnlineServers();
    // Take care of servers w/o assignments, and remove servers in draining mode
    List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
    for (Map<ServerName, List<HRegionInfo>> map : result.values()) {
      for (ServerName svr : onlineSvrs.keySet()) {
        if (!map.containsKey(svr)) {
          map.put(svr, new ArrayList<HRegionInfo>());
        }
      }
      map.keySet().removeAll(drainingServers);
    }
    return result;
  }
Beispiel #3
0
  @Test
  public void testCloseRegionWhenServerNameIsEmpty() throws Exception {
    String tbName = "TestHBACloseRegionWhenServerNameIsEmpty";
    byte[] TABLENAME = Bytes.toBytes(tbName);
    HBaseAdmin admin = createTable(TABLENAME);

    HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME);

    try {
      List<HRegionInfo> onlineRegions = rs.getOnlineRegions();
      for (HRegionInfo regionInfo : onlineRegions) {
        if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) {
          if (regionInfo.getRegionNameAsString().contains(tbName)) {
            admin.closeRegionWithEncodedRegionName(regionInfo.getEncodedName(), " ");
          }
        }
      }
      fail("The test should throw exception if the servername passed is empty.");
    } catch (IllegalArgumentException e) {
    }
  }
Beispiel #4
0
  @Test
  public void testCloseRegionIfInvalidRegionNameIsPassed() throws Exception {
    String tbName = "TestCloseRegionIfInvalidRegionName";
    byte[] TABLENAME = Bytes.toBytes(tbName);
    HBaseAdmin admin = createTable(TABLENAME);

    HRegionInfo info = null;
    HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME);
    List<HRegionInfo> onlineRegions = rs.getOnlineRegions();
    for (HRegionInfo regionInfo : onlineRegions) {
      if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) {
        if (regionInfo.getRegionNameAsString().contains(tbName)) {
          info = regionInfo;
          admin.closeRegionWithEncodedRegionName("sample", rs.getServerInfo().getHostnamePort());
        }
      }
    }
    onlineRegions = rs.getOnlineRegions();
    assertTrue(
        "The region should be present in online regions list.", onlineRegions.contains(info));
  }
Beispiel #5
0
  @Test
  public void testShouldCloseTheRegionBasedOnTheEncodedRegionName() throws Exception {
    String tbName = "TestHBACloseRegion";
    byte[] TABLENAME = Bytes.toBytes(tbName);
    HBaseAdmin admin = createTable(TABLENAME);

    HRegionInfo info = null;
    HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLENAME);
    List<HRegionInfo> onlineRegions = rs.getOnlineRegions();
    for (HRegionInfo regionInfo : onlineRegions) {
      if (!regionInfo.isMetaRegion() && !regionInfo.isRootRegion()) {
        info = regionInfo;
        admin.closeRegionWithEncodedRegionName(
            regionInfo.getEncodedName(), rs.getServerInfo().getHostnamePort());
      }
    }
    Thread.sleep(1000);
    onlineRegions = rs.getOnlineRegions();
    assertFalse(
        "The region should not be present in online regions list.", onlineRegions.contains(info));
  }
Beispiel #6
0
  /**
   * Generate a global load balancing plan according to the specified map of server information to
   * the most loaded regions of each server.
   *
   * <p>The load balancing invariant is that all servers are within 1 region of the average number
   * of regions per server. If the average is an integer number, all servers will be balanced to the
   * average. Otherwise, all servers will have either floor(average) or ceiling(average) regions.
   *
   * <p>HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that we can fetch from
   * both ends of the queue. At the beginning, we check whether there was empty region server just
   * discovered by Master. If so, we alternately choose new / old regions from head / tail of
   * regionsToMove, respectively. This alternation avoids clustering young regions on the newly
   * discovered region server. Otherwise, we choose new regions from head of regionsToMove.
   *
   * <p>Another improvement from HBASE-3609 is that we assign regions from regionsToMove to
   * underloaded servers in round-robin fashion. Previously one underloaded server would be filled
   * before we move onto the next underloaded server, leading to clustering of young regions.
   *
   * <p>Finally, we randomly shuffle underloaded servers so that they receive offloaded regions
   * relatively evenly across calls to balanceCluster().
   *
   * <p>The algorithm is currently implemented as such:
   *
   * <ol>
   *   <li>Determine the two valid numbers of regions each server should have,
   *       <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
   *   <li>Iterate down the most loaded servers, shedding regions from each so each server hosts
   *       exactly <b>MAX</b> regions. Stop once you reach a server that already has &lt;=
   *       <b>MAX</b> regions.
   *       <p>Order the regions to move from most recent to least.
   *   <li>Iterate down the least loaded servers, assigning regions so each server has exactly
   *       </b>MIN</b> regions. Stop once you reach a server that already has &gt;= <b>MIN</b>
   *       regions.
   *       <p>Regions being assigned to underloaded servers are those that were shed in the previous
   *       step. It is possible that there were not enough regions shed to fill each underloaded
   *       server to <b>MIN</b>. If so we end up with a number of regions required to do so,
   *       <b>neededRegions</b>.
   *       <p>It is also possible that we were able to fill each underloaded but ended up with
   *       regions that were unassigned from overloaded servers but that still do not have
   *       assignment.
   *       <p>If neither of these conditions hold (no regions needed to fill the underloaded
   *       servers, no regions leftover from overloaded servers), we are done and return. Otherwise
   *       we handle these cases below.
   *   <li>If <b>neededRegions</b> is non-zero (still have underloaded servers), we iterate the most
   *       loaded servers again, shedding a single server from each (this brings them from having
   *       <b>MAX</b> regions to having <b>MIN</b> regions).
   *   <li>We now definitely have more regions that need assignment, either from the previous step
   *       or from the original shedding from overloaded servers. Iterate the least loaded servers
   *       filling each to <b>MIN</b>.
   *   <li>If we still have more regions that need assignment, again iterate the least loaded
   *       servers, this time giving each one (filling them to </b>MAX</b>) until we run out.
   *   <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
   *       <p>In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed to end up with
   *       <b>MAX</b> regions at the end of the balancing. This ensures the minimal number of
   *       regions possible are moved.
   * </ol>
   *
   * TODO: We can at-most reassign the number of regions away from a particular server to be how
   * many they report as most loaded. Should we just keep all assignment in memory? Any objections?
   * Does this mean we need HeapSize on HMaster? Or just careful monitor? (current thinking is we
   * will hold all assignments in memory)
   *
   * @param clusterMap Map of regionservers and their load/region information to a list of their
   *     most loaded regions
   * @return a list of regions to be moved, including source and destination, or null if cluster is
   *     already balanced
   */
  @Override
  public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
    if (regionsToReturn != null) {
      return regionsToReturn;
    }
    filterExcludedServers(clusterMap);
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    Collection<ServerName> backupMasters = getBackupMasters();
    ClusterLoadState cs =
        new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap);
    // construct a Cluster object with clusterMap and rest of the
    // argument as defaults
    Cluster c =
        new Cluster(
            masterServerName,
            clusterMap,
            null,
            this.regionFinder,
            getBackupMasters(),
            tablesOnMaster,
            this.rackManager);
    if (!this.needsBalance(c)) return null;

    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    float average = cs.getLoadAverage();
    int max = (int) Math.ceil(average);
    int min = (int) average;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam
        .append("Balance parameter: numRegions=")
        .append(numRegions)
        .append(", numServers=")
        .append(numServers)
        .append(", numBackupMasters=")
        .append(cs.getNumBackupMasters())
        .append(", backupMasterWeight=")
        .append(backupMasterWeight)
        .append(", max=")
        .append(max)
        .append(", min=")
        .append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove =
        MinMaxPriorityQueue.orderedBy(rpComparator).create();
    regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
        serversByLoad.descendingMap().entrySet()) {
      ServerAndLoad sal = server.getKey();
      int load = sal.getLoad();
      if (load <= max) {
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
        break;
      }
      serversOverloaded++;
      List<HRegionInfo> regions = server.getValue();
      int w = 1; // Normal region server has weight 1
      if (backupMasters != null && backupMasters.contains(sal.getServerName())) {
        w = backupMasterWeight; // Backup master has heavier weight
      }
      int numToOffload = Math.min((load - max) / w, regions.size());
      // account for the out-of-band regions which were assigned to this server
      // after some other region server crashed
      Collections.sort(regions, riComparator);
      int numTaken = 0;
      for (int i = 0; i <= numToOffload; ) {
        HRegionInfo hri = regions.get(i); // fetch from head
        if (fetchFromTail) {
          hri = regions.get(regions.size() - 1 - i);
        }
        i++;
        // Don't rebalance special regions.
        if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName())) continue;
        regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
        numTaken++;
        if (numTaken >= numToOffload) break;
        // fetch in alternate order if there is new region server
        if (emptyRegionServerPresent) {
          fetchFromTail = !fetchFromTail;
        }
      }
      serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    int maxToTake = numRegions - min;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
      if (maxToTake == 0) break; // no more to take
      int load = server.getKey().getLoad();
      if (load >= min && load > 0) {
        continue; // look for other servers which haven't reached min
      }
      int w = 1; // Normal region server has weight 1
      if (backupMasters != null && backupMasters.contains(server.getKey().getServerName())) {
        w = backupMasterWeight; // Backup master has heavier weight
      }
      int regionsToPut = (min - load) / w;
      if (regionsToPut == 0) {
        regionsToPut = 1;
      }
      maxToTake -= regionsToPut;
      underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns =
        Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
      int cnt = 0;
      int i = incr > 0 ? 0 : underloadedServers.size() - 1;
      for (; i >= 0 && i < underloadedServers.size(); i += incr) {
        if (regionsToMove.isEmpty()) break;
        ServerName si = sns.get(i);
        int numToTake = underloadedServers.get(si);
        if (numToTake == 0) continue;

        addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
        if (emptyRegionServerPresent) {
          fetchFromTail = !fetchFromTail;
        }

        underloadedServers.put(si, numToTake - 1);
        cnt++;
        BalanceInfo bi = serverBalanceInfo.get(si);
        if (bi == null) {
          bi = new BalanceInfo(0, 0);
          serverBalanceInfo.put(si, bi);
        }
        bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
      }
      if (cnt == 0) break;
      // iterates underloadedServers in the other direction
      incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
      // If we still want to take some, increment needed
      neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
      long endTime = System.currentTimeMillis();
      LOG.info(
          "Calculated a load balance in "
              + (endTime - startTime)
              + "ms. "
              + "Moving "
              + totalNumMoved
              + " regions off of "
              + serversOverloaded
              + " overloaded servers onto "
              + serversUnderloaded
              + " less loaded servers");
      return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
      // Walk down most loaded, grabbing one from each until we get enough
      for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
          serversByLoad.descendingMap().entrySet()) {
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
        if (idx >= server.getValue().size()) break;
        HRegionInfo region = server.getValue().get(idx);
        if (region.isMetaRegion()) continue; // Don't move meta regions.
        regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
        totalNumMoved++;
        if (--neededRegions == 0) {
          // No more regions needed, done shedding
          break;
        }
      }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
      int regionCount = server.getKey().getLoad();
      if (regionCount >= min) break;
      BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
      if (balanceInfo != null) {
        regionCount += balanceInfo.getNumRegionsAdded();
      }
      if (regionCount >= min) {
        continue;
      }
      int numToTake = min - regionCount;
      int numTaken = 0;
      while (numTaken < numToTake && 0 < regionsToMove.size()) {
        addRegionPlan(
            regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
        numTaken++;
        if (emptyRegionServerPresent) {
          fetchFromTail = !fetchFromTail;
        }
      }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
      for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
          regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= max) {
          break;
        }
        addRegionPlan(
            regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
        if (emptyRegionServerPresent) {
          fetchFromTail = !fetchFromTail;
        }
        if (regionsToMove.isEmpty()) {
          break;
        }
      }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
      // Emit data so can diagnose how balancer went astray.
      LOG.warn(
          "regionsToMove="
              + totalNumMoved
              + ", numServers="
              + numServers
              + ", serversOverloaded="
              + serversOverloaded
              + ", serversUnderloaded="
              + serversUnderloaded);
      StringBuilder sb = new StringBuilder();
      for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
        if (sb.length() > 0) sb.append(", ");
        sb.append(e.getKey().toString());
        sb.append(" ");
        sb.append(e.getValue().size());
      }
      LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info(
        "Done. Calculated a load balance in "
            + (endTime - startTime)
            + "ms. "
            + "Moving "
            + totalNumMoved
            + " regions off of "
            + serversOverloaded
            + " overloaded servers onto "
            + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
  }
  /**
   * Test the global mem store size in the region server is equal to sum of each region's mem store
   * size
   *
   * @throws Exception
   */
  @Test
  public void testGlobalMemStore() throws Exception {
    // Start the cluster
    LOG.info("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(1, regionServerNum);
    cluster = TEST_UTIL.getHBaseCluster();
    LOG.info("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();

    // Create a table with regions
    TableName table = TableName.valueOf("TestGlobalMemStoreSize");
    byte[] family = Bytes.toBytes("family");
    LOG.info("Creating table with " + regionNum + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(table, family, regionNum);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) {
      numRegions = r.getStartKeys().length;
    }
    assertEquals(regionNum, numRegions);
    waitForAllRegionsAssigned();

    for (HRegionServer server : getOnlineRegionServers()) {
      long globalMemStoreSize = 0;
      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        globalMemStoreSize +=
            server.getFromOnlineRegions(regionInfo.getEncodedName()).getMemstoreSize();
      }
      assertEquals(server.getRegionServerAccounting().getGlobalMemstoreSize(), globalMemStoreSize);
    }

    // check the global memstore size after flush
    int i = 0;
    for (HRegionServer server : getOnlineRegionServers()) {
      LOG.info(
          "Starting flushes on "
              + server.getServerName()
              + ", size="
              + server.getRegionServerAccounting().getGlobalMemstoreSize());

      for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
        Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
        flush(r, server);
      }
      LOG.info("Post flush on " + server.getServerName());
      long now = System.currentTimeMillis();
      long timeout = now + 1000;
      while (server.getRegionServerAccounting().getGlobalMemstoreSize() != 0
          && timeout < System.currentTimeMillis()) {
        Threads.sleep(10);
      }
      long size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      if (size > 0) {
        // If size > 0, see if its because the meta region got edits while
        // our test was running....
        for (HRegionInfo regionInfo : ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
          Region r = server.getFromOnlineRegions(regionInfo.getEncodedName());
          long l = r.getMemstoreSize();
          if (l > 0) {
            // Only meta could have edits at this stage.  Give it another flush
            // clear them.
            assertTrue(regionInfo.isMetaRegion());
            LOG.info(r.toString() + " " + l + ", reflushing");
            r.flush(true);
          }
        }
      }
      size = server.getRegionServerAccounting().getGlobalMemstoreSize();
      assertEquals("Server=" + server.getServerName() + ", i=" + i++, 0, size);
    }

    ht.close();
    TEST_UTIL.shutdownMiniCluster();
  }