/** * Creates the calls and submit them * * @param cs - the completion service to use for submitting * @param rl - the region locations * @param min - the id of the first replica, inclusive * @param max - the id of the last replica, inclusive. */ private void addCallsForReplica( ResultBoundedCompletionService<Result> cs, RegionLocations rl, int min, int max) { for (int id = min; id <= max; id++) { HRegionLocation hrl = rl.getRegionLocation(id); ReplicaRegionServerCallable callOnReplica = new ReplicaRegionServerCallable(id, hrl); cs.submit(callOnReplica, callTimeout, id); } }
/** * Two responsibilities - if the call is already completed (by another replica) stops the * retries. - set the location to the right region, depending on the replica. */ @Override public void prepare(final boolean reload) throws IOException { if (controller.isCanceled()) return; if (Thread.interrupted()) { throw new InterruptedIOException(); } if (reload || location == null) { RegionLocations rl = getRegionLocations(false, id, cConnection, tableName, get.getRow()); location = id < rl.size() ? rl.getRegionLocation(id) : null; } if (location == null || location.getServerName() == null) { // With this exception, there will be a retry. The location can be null for a replica // when the table is created or after a split. throw new HBaseIOException("There is no location for replica id #" + id); } ServerName dest = location.getServerName(); setStub(cConnection.getClient(dest)); }
public void append( TableName tableName, byte[] encodedRegionName, byte[] row, List<Entry> entries) throws IOException { if (disabledAndDroppedTables.getIfPresent(tableName) != null) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries because table " + tableName + " is cached as a disabled or dropped table"); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } // If the table is disabled or dropped, we should not replay the entries, and we can skip // replaying them. However, we might not know whether the table is disabled until we // invalidate the cache and check from meta RegionLocations locations = null; boolean useCache = true; while (true) { // get the replicas of the primary region try { locations = RegionReplicaReplayCallable.getRegionLocations( connection, tableName, row, useCache, 0); if (locations == null) { throw new HBaseIOException( "Cannot locate locations for " + tableName + ", row:" + Bytes.toStringBinary(row)); } } catch (TableNotFoundException e) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries because table " + tableName + " is dropped. Adding table to cache."); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache. Value ignored // skip this entry sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } // check whether we should still replay this entry. If the regions are changed, or the // entry is not coming from the primary region, filter it out. HRegionLocation primaryLocation = locations.getDefaultRegionLocation(); if (!Bytes.equals( primaryLocation.getRegionInfo().getEncodedNameAsBytes(), encodedRegionName)) { if (useCache) { useCache = false; continue; // this will retry location lookup } if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries in table " + tableName + " because located region " + primaryLocation.getRegionInfo().getEncodedName() + " is different than the original region " + Bytes.toStringBinary(encodedRegionName) + " from WALEdit"); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } sink.getSkippedEditsCounter().addAndGet(entries.size()); return; } break; } if (locations.size() == 1) { return; } ArrayList<Future<ReplicateWALEntryResponse>> tasks = new ArrayList<Future<ReplicateWALEntryResponse>>(locations.size() - 1); // All passed entries should belong to one region because it is coming from the EntryBuffers // split per region. But the regions might split and merge (unlike log recovery case). for (int replicaId = 0; replicaId < locations.size(); replicaId++) { HRegionLocation location = locations.getRegionLocation(replicaId); if (!RegionReplicaUtil.isDefaultReplica(replicaId)) { HRegionInfo regionInfo = location == null ? RegionReplicaUtil.getRegionInfoForReplica( locations.getDefaultRegionLocation().getRegionInfo(), replicaId) : location.getRegionInfo(); RegionReplicaReplayCallable callable = new RegionReplicaReplayCallable( connection, rpcControllerFactory, tableName, location, regionInfo, row, entries, sink.getSkippedEditsCounter()); Future<ReplicateWALEntryResponse> task = pool.submit( new RetryingRpcCallable<ReplicateWALEntryResponse>( rpcRetryingCallerFactory, callable, operationTimeout)); tasks.add(task); } } boolean tasksCancelled = false; for (Future<ReplicateWALEntryResponse> task : tasks) { try { task.get(); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof IOException) { // The table can be disabled or dropped at this time. For disabled tables, we have no // cheap mechanism to detect this case because meta does not contain this information. // HConnection.isTableDisabled() is a zk call which we cannot do for every replay RPC. // So instead we start the replay RPC with retries and // check whether the table is dropped or disabled which might cause // SocketTimeoutException, or RetriesExhaustedException or similar if we get IOE. if (cause instanceof TableNotFoundException || connection.isTableDisabled(tableName)) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries in table " + tableName + " because received exception for dropped or disabled table", cause); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache for later. if (!tasksCancelled) { sink.getSkippedEditsCounter().addAndGet(entries.size()); tasksCancelled = true; // so that we do not add to skipped counter again } continue; } // otherwise rethrow throw (IOException) cause; } // unexpected exception throw new IOException(cause); } } }