protected Map<ServerName, List<HRegionInfo>> createServerMap( int numNodes, int numRegions, int numRegionsPerServer, int replication, int numTables) { // construct a cluster of numNodes, having a total of numRegions. Each RS will hold // numRegionsPerServer many regions except for the last one, which will host all the // remaining regions int[] cluster = new int[numNodes]; for (int i = 0; i < numNodes; i++) { cluster[i] = numRegionsPerServer; } cluster[cluster.length - 1] = numRegions - ((cluster.length - 1) * numRegionsPerServer); Map<ServerName, List<HRegionInfo>> clusterState = mockClusterServers(cluster, numTables); if (replication > 0) { // replicate the regions to the same servers for (List<HRegionInfo> regions : clusterState.values()) { int length = regions.size(); for (int i = 0; i < length; i++) { for (int r = 1; r < replication; r++) { regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(i), r)); } } } } return clusterState; }
private void addToReplicaMapping(HRegionInfo hri) { HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); Set<HRegionInfo> replicas = defaultReplicaToOtherReplicas.get(defaultReplica); if (replicas == null) { replicas = new HashSet<HRegionInfo>(); defaultReplicaToOtherReplicas.put(defaultReplica, replicas); } replicas.add(hri); }
/** * @param zkw * @param replicaId * @return meta table regions and their locations. */ public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations( ZooKeeperWatcher zkw, int replicaId) { ServerName serverName = getMetaRegionLocation(zkw, replicaId); List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>(); list.add( new Pair<HRegionInfo, ServerName>( RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName)); return list; }
private void removeFromReplicaMapping(HRegionInfo hri) { HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); Set<HRegionInfo> replicas = defaultReplicaToOtherReplicas.get(defaultReplica); if (replicas != null) { replicas.remove(hri); if (replicas.isEmpty()) { defaultReplicaToOtherReplicas.remove(defaultReplica); } } }
/** Checks whether region replicas are not hosted on the same host. */ public void assertRegionReplicaPlacement( Map<ServerName, List<HRegionInfo>> serverMap, RackManager rackManager) { TreeMap<String, Set<HRegionInfo>> regionsPerHost = new TreeMap<String, Set<HRegionInfo>>(); TreeMap<String, Set<HRegionInfo>> regionsPerRack = new TreeMap<String, Set<HRegionInfo>>(); for (Entry<ServerName, List<HRegionInfo>> entry : serverMap.entrySet()) { String hostname = entry.getKey().getHostname(); Set<HRegionInfo> infos = regionsPerHost.get(hostname); if (infos == null) { infos = new HashSet<HRegionInfo>(); regionsPerHost.put(hostname, infos); } for (HRegionInfo info : entry.getValue()) { HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info); if (!infos.add(primaryInfo)) { Assert.fail("Two or more region replicas are hosted on the same host after balance"); } } } if (rackManager == null) { return; } for (Entry<ServerName, List<HRegionInfo>> entry : serverMap.entrySet()) { String rack = rackManager.getRack(entry.getKey()); Set<HRegionInfo> infos = regionsPerRack.get(rack); if (infos == null) { infos = new HashSet<HRegionInfo>(); regionsPerRack.put(rack, infos); } for (HRegionInfo info : entry.getValue()) { HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info); if (!infos.add(primaryInfo)) { Assert.fail("Two or more region replicas are hosted on the same rack after balance"); } } } }
@Test public void testMergeWithReplicas() throws Exception { final TableName tableName = TableName.valueOf("testMergeWithReplicas"); // Create table and load data. createTableAndLoadData(master, tableName, 5, 2); List<Pair<HRegionInfo, ServerName>> initialRegionToServers = MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), tableName); // Merge 1st and 2nd region PairOfSameType<HRegionInfo> mergedRegions = mergeRegionsAndVerifyRegionNum(master, tableName, 0, 2, 5 * 2 - 2); List<Pair<HRegionInfo, ServerName>> currentRegionToServers = MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), tableName); List<HRegionInfo> initialRegions = new ArrayList<HRegionInfo>(); for (Pair<HRegionInfo, ServerName> p : initialRegionToServers) { initialRegions.add(p.getFirst()); } List<HRegionInfo> currentRegions = new ArrayList<HRegionInfo>(); for (Pair<HRegionInfo, ServerName> p : currentRegionToServers) { currentRegions.add(p.getFirst()); } assertTrue(initialRegions.contains(mergedRegions.getFirst())); // this is the first region assertTrue( initialRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( mergedRegions.getFirst(), 1))); // this is the replica of the first region assertTrue(initialRegions.contains(mergedRegions.getSecond())); // this is the second region assertTrue( initialRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( mergedRegions.getSecond(), 1))); // this is the replica of the second region assertTrue(!initialRegions.contains(currentRegions.get(0))); // this is the new region assertTrue( !initialRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( currentRegions.get(0), 1))); // replica of the new region assertTrue( currentRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( currentRegions.get(0), 1))); // replica of the new region assertTrue( !currentRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( mergedRegions.getFirst(), 1))); // replica of the merged region assertTrue( !currentRegions.contains( RegionReplicaUtil.getRegionInfoForReplica( mergedRegions.getSecond(), 1))); // replica of the merged region }
/** * Return the replicas (including default) for the regions grouped by ServerName * * @param regions * @return a pair containing the groupings as a map */ synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignments( Collection<HRegionInfo> regions) { Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>(); for (HRegionInfo region : regions) { HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(region); Set<HRegionInfo> allReplicas = defaultReplicaToOtherReplicas.get(defaultReplica); if (allReplicas != null) { for (HRegionInfo hri : allReplicas) { ServerName server = regionAssignments.get(hri); if (server != null) { List<HRegionInfo> regionsOnServer = map.get(server); if (regionsOnServer == null) { regionsOnServer = new ArrayList<HRegionInfo>(1); map.put(server, regionsOnServer); } regionsOnServer.add(hri); } } } } return map; }
/** * Verify <code>hbase:meta</code> is deployed and accessible. * * @param connection * @param zkw * @param timeout How long to wait on zk for meta address (passed through to * @param replicaId * @return True if the <code>hbase:meta</code> location is healthy. * @throws InterruptedException * @throws IOException */ public boolean verifyMetaRegionLocation( ClusterConnection connection, ZooKeeperWatcher zkw, final long timeout, int replicaId) throws InterruptedException, IOException { AdminProtos.AdminService.BlockingInterface service = null; try { service = getMetaServerConnection(connection, zkw, timeout, replicaId); } catch (NotAllMetaRegionsOnlineException e) { // Pass } catch (ServerNotRunningYetException e) { // Pass -- remote server is not up so can't be carrying root } catch (UnknownHostException e) { // Pass -- server name doesn't resolve so it can't be assigned anything. } catch (RegionServerStoppedException e) { // Pass -- server name sends us to a server that is dying or already dead. } return (service != null) && verifyRegionLocation( connection, service, getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId) .getRegionName()); }
/** * Load the meta region state from the meta server ZNode. * * @param zkw * @param replicaId * @return regionstate * @throws KeeperException */ public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId) throws KeeperException { RegionState.State state = RegionState.State.OPEN; ServerName serverName = null; try { byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId)); if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { try { int prefixLen = ProtobufUtil.lengthOfPBMagic(); ZooKeeperProtos.MetaRegionServer rl = ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom( data, prefixLen, data.length - prefixLen); if (rl.hasState()) { state = RegionState.State.convert(rl.getState()); } HBaseProtos.ServerName sn = rl.getServer(); serverName = ServerName.valueOf(sn.getHostName(), sn.getPort(), sn.getStartCode()); } catch (InvalidProtocolBufferException e) { throw new DeserializationException("Unable to parse meta region location"); } } else { // old style of meta region location? serverName = ServerName.parseFrom(data); } } catch (DeserializationException e) { throw ZKUtil.convert(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } if (serverName == null) { state = RegionState.State.OFFLINE; } return new RegionState( RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId), state, serverName); }
public void append( TableName tableName, byte[] encodedRegionName, byte[] row, List<Entry> entries) throws IOException { if (disabledAndDroppedTables.getIfPresent(tableName) != null) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries because table " + tableName + " is cached as a disabled or dropped table"); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } // If the table is disabled or dropped, we should not replay the entries, and we can skip // replaying them. However, we might not know whether the table is disabled until we // invalidate the cache and check from meta RegionLocations locations = null; boolean useCache = true; while (true) { // get the replicas of the primary region try { locations = RegionReplicaReplayCallable.getRegionLocations( connection, tableName, row, useCache, 0); if (locations == null) { throw new HBaseIOException( "Cannot locate locations for " + tableName + ", row:" + Bytes.toStringBinary(row)); } } catch (TableNotFoundException e) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries because table " + tableName + " is dropped. Adding table to cache."); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache. Value ignored // skip this entry sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } // check whether we should still replay this entry. If the regions are changed, or the // entry is not coming from the primary region, filter it out. HRegionLocation primaryLocation = locations.getDefaultRegionLocation(); if (!Bytes.equals( primaryLocation.getRegionInfo().getEncodedNameAsBytes(), encodedRegionName)) { if (useCache) { useCache = false; continue; // this will retry location lookup } if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries in table " + tableName + " because located region " + primaryLocation.getRegionInfo().getEncodedName() + " is different than the original region " + Bytes.toStringBinary(encodedRegionName) + " from WALEdit"); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } break; } if (locations.size() == 1) { return; } ArrayList<Future<ReplicateWALEntryResponse>> tasks = new ArrayList<Future<ReplicateWALEntryResponse>>(locations.size() - 1); // All passed entries should belong to one region because it is coming from the EntryBuffers // split per region. But the regions might split and merge (unlike log recovery case). for (int replicaId = 0; replicaId < locations.size(); replicaId++) { HRegionLocation location = locations.getRegionLocation(replicaId); if (!RegionReplicaUtil.isDefaultReplica(replicaId)) { HRegionInfo regionInfo = location == null ? RegionReplicaUtil.getRegionInfoForReplica( locations.getDefaultRegionLocation().getRegionInfo(), replicaId) : location.getRegionInfo(); RegionReplicaReplayCallable callable = new RegionReplicaReplayCallable( connection, rpcControllerFactory, tableName, location, regionInfo, row, entries, sink.getSkippedEditsCounter()); Future<ReplicateWALEntryResponse> task = pool.submit( new RetryingRpcCallable<ReplicateWALEntryResponse>( rpcRetryingCallerFactory, callable, operationTimeout)); tasks.add(task); } } boolean tasksCancelled = false; for (Future<ReplicateWALEntryResponse> task : tasks) { try { task.get(); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof IOException) { // The table can be disabled or dropped at this time. For disabled tables, we have no // cheap mechanism to detect this case because meta does not contain this information. // HConnection.isTableDisabled() is a zk call which we cannot do for every replay RPC. // So instead we start the replay RPC with retries and // check whether the table is dropped or disabled which might cause // SocketTimeoutException, or RetriesExhaustedException or similar if we get IOE. if (cause instanceof TableNotFoundException || connection.isTableDisabled(tableName)) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries in table " + tableName + " because received exception for dropped or disabled table", cause); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache for later. if (!tasksCancelled) { sink.getSkippedEditsCounter().addAndGet(entries.size()); tasksCancelled = true; // so that we do not add to skipped counter again } continue; } // otherwise rethrow throw (IOException) cause; } // unexpected exception throw new IOException(cause); } } }