@Override public Result[] call(int timeout) throws IOException { // If the active replica callable was closed somewhere, invoke the RPC to // really close it. In the case of regular scanners, this applies. We make couple // of RPCs to a RegionServer, and when that region is exhausted, we set // the closed flag. Then an RPC is required to actually close the scanner. if (currentScannerCallable != null && currentScannerCallable.closed) { // For closing we target that exact scanner (and not do replica fallback like in // the case of normal reads) if (LOG.isDebugEnabled()) { LOG.debug("Closing scanner " + currentScannerCallable.scannerId); } Result[] r = currentScannerCallable.call(timeout); currentScannerCallable = null; return r; } // We need to do the following: // 1. When a scan goes out to a certain replica (default or not), we need to // continue to hit that until there is a failure. So store the last successfully invoked // replica // 2. We should close the "losing" scanners (scanners other than the ones we hear back // from first) // RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations( true, RegionReplicaUtil.DEFAULT_REPLICA_ID, cConnection, tableName, currentScannerCallable.getRow()); // allocate a boundedcompletion pool of some multiple of number of replicas. // We want to accomodate some RPCs for redundant replica scans (but are still in progress) BoundedCompletionService<Pair<Result[], ScannerCallable>> cs = new BoundedCompletionService<Pair<Result[], ScannerCallable>>(pool, rl.size() * 5); List<ExecutionException> exceptions = null; int submitted = 0, completed = 0; AtomicBoolean done = new AtomicBoolean(false); replicaSwitched.set(false); // submit call for the primary replica. submitted += addCallsForCurrentReplica(cs, rl); try { // wait for the timeout to see whether the primary responds back Future<Pair<Result[], ScannerCallable>> f = cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds if (f != null) { Pair<Result[], ScannerCallable> r = f.get(); if (r != null && r.getSecond() != null) { updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool); } return r == null ? null : r.getFirst(); // great we got a response } } catch (ExecutionException e) { // the primary call failed with RetriesExhaustedException or DoNotRetryIOException // but the secondaries might still succeed. Continue on the replica RPCs. exceptions = new ArrayList<ExecutionException>(rl.size()); exceptions.add(e); completed++; } catch (CancellationException e) { throw new InterruptedIOException(e.getMessage()); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } // submit call for the all of the secondaries at once // TODO: this may be an overkill for large region replication submitted += addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1); try { while (completed < submitted) { try { Future<Pair<Result[], ScannerCallable>> f = cs.take(); Pair<Result[], ScannerCallable> r = f.get(); if (r != null && r.getSecond() != null) { updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool); } return r == null ? null : r.getFirst(); // great we got an answer } catch (ExecutionException e) { // if not cancel or interrupt, wait until all RPC's are done // one of the tasks failed. Save the exception for later. if (exceptions == null) exceptions = new ArrayList<ExecutionException>(rl.size()); exceptions.add(e); completed++; } } } catch (CancellationException e) { throw new InterruptedIOException(e.getMessage()); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } finally { // We get there because we were interrupted or because one or more of the // calls succeeded or failed. In all case, we stop all our tasks. cs.cancelAll(true); } if (exceptions != null && !exceptions.isEmpty()) { RpcRetryingCallerWithReadReplicas.throwEnrichedException( exceptions.get(0), retries); // just rethrow the first exception for now. } return null; // unreachable }