/**
   * Algo: - we put the query into the execution pool. - after x ms, if we don't have a result, we
   * add the queries for the secondary replicas - we take the first answer - when done, we cancel
   * what's left. Cancelling means: - removing from the pool if the actual call was not started -
   * interrupting the call if it has started Client side, we need to take into account - a call is
   * not executed immediately after being put into the pool - a call is a thread. Let's not multiply
   * the number of thread by the number of replicas. Server side, if we can cancel when it's still
   * in the handler pool, it's much better, as a call can take some i/o.
   *
   * <p>Globally, the number of retries, timeout and so on still applies, but it's per replica, not
   * global. We continue until all retries are done, or all timeouts are exceeded.
   */
  public synchronized Result call()
      throws DoNotRetryIOException, InterruptedIOException, RetriesExhaustedException {
    boolean isTargetReplicaSpecified = (get.getReplicaId() >= 0);

    RegionLocations rl =
        getRegionLocations(
            true,
            (isTargetReplicaSpecified ? get.getReplicaId() : RegionReplicaUtil.DEFAULT_REPLICA_ID),
            cConnection,
            tableName,
            get.getRow());
    ResultBoundedCompletionService<Result> cs =
        new ResultBoundedCompletionService<Result>(this.rpcRetryingCallerFactory, pool, rl.size());

    if (isTargetReplicaSpecified) {
      addCallsForReplica(cs, rl, get.getReplicaId(), get.getReplicaId());
    } else {
      addCallsForReplica(cs, rl, 0, 0);
      try {
        // wait for the timeout to see whether the primary responds back
        Future<Result> f = cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds
        if (f != null) {
          return f.get(); // great we got a response
        }
      } catch (ExecutionException e) {
        throwEnrichedException(e, retries);
      } catch (CancellationException e) {
        throw new InterruptedIOException();
      } catch (InterruptedException e) {
        throw new InterruptedIOException();
      }

      // submit call for the all of the secondaries at once
      addCallsForReplica(cs, rl, 1, rl.size() - 1);
    }

    try {
      try {
        Future<Result> f = cs.take();
        return f.get();
      } catch (ExecutionException e) {
        throwEnrichedException(e, retries);
      }
    } catch (CancellationException e) {
      throw new InterruptedIOException();
    } catch (InterruptedException e) {
      throw new InterruptedIOException();
    } finally {
      // We get there because we were interrupted or because one or more of the
      // calls succeeded or failed. In all case, we stop all our tasks.
      cs.cancelAll();
    }

    return null; // unreachable
  }
 /**
  * Creates the calls and submit them
  *
  * @param cs - the completion service to use for submitting
  * @param rl - the region locations
  * @param min - the id of the first replica, inclusive
  * @param max - the id of the last replica, inclusive.
  */
 private void addCallsForReplica(
     ResultBoundedCompletionService<Result> cs, RegionLocations rl, int min, int max) {
   for (int id = min; id <= max; id++) {
     HRegionLocation hrl = rl.getRegionLocation(id);
     ReplicaRegionServerCallable callOnReplica = new ReplicaRegionServerCallable(id, hrl);
     cs.submit(callOnReplica, callTimeout, id);
   }
 }
Example #3
0
 private void printLocations(Result r) {
   RegionLocations rl = null;
   if (r == null) {
     LOG.info("FAILED FOR null Result");
     return;
   }
   LOG.info("FAILED FOR " + resultToString(r) + " Stale " + r.isStale());
   if (r.getRow() == null) {
     return;
   }
   try {
     rl = ((ClusterConnection) connection).locateRegion(tableName, r.getRow(), true, true);
   } catch (IOException e) {
     LOG.warn("Couldn't get locations for row " + Bytes.toString(r.getRow()));
   }
   HRegionLocation locations[] = rl.getRegionLocations();
   for (HRegionLocation h : locations) {
     LOG.info("LOCATION " + h);
   }
 }
    /**
     * Two responsibilities - if the call is already completed (by another replica) stops the
     * retries. - set the location to the right region, depending on the replica.
     */
    @Override
    public void prepare(final boolean reload) throws IOException {
      if (controller.isCanceled()) return;

      if (Thread.interrupted()) {
        throw new InterruptedIOException();
      }

      if (reload || location == null) {
        RegionLocations rl = getRegionLocations(false, id, cConnection, tableName, get.getRow());
        location = id < rl.size() ? rl.getRegionLocation(id) : null;
      }

      if (location == null || location.getServerName() == null) {
        // With this exception, there will be a retry. The location can be null for a replica
        //  when the table is created or after a split.
        throw new HBaseIOException("There is no location for replica id #" + id);
      }

      ServerName dest = location.getServerName();

      setStub(cConnection.getClient(dest));
    }
  @Override
  public Result[] call(int timeout) throws IOException {
    // If the active replica callable was closed somewhere, invoke the RPC to
    // really close it. In the case of regular scanners, this applies. We make couple
    // of RPCs to a RegionServer, and when that region is exhausted, we set
    // the closed flag. Then an RPC is required to actually close the scanner.
    if (currentScannerCallable != null && currentScannerCallable.closed) {
      // For closing we target that exact scanner (and not do replica fallback like in
      // the case of normal reads)
      if (LOG.isDebugEnabled()) {
        LOG.debug("Closing scanner " + currentScannerCallable.scannerId);
      }
      Result[] r = currentScannerCallable.call(timeout);
      currentScannerCallable = null;
      return r;
    }
    // We need to do the following:
    // 1. When a scan goes out to a certain replica (default or not), we need to
    //   continue to hit that until there is a failure. So store the last successfully invoked
    //   replica
    // 2. We should close the "losing" scanners (scanners other than the ones we hear back
    //   from first)
    //
    RegionLocations rl =
        RpcRetryingCallerWithReadReplicas.getRegionLocations(
            true,
            RegionReplicaUtil.DEFAULT_REPLICA_ID,
            cConnection,
            tableName,
            currentScannerCallable.getRow());

    // allocate a boundedcompletion pool of some multiple of number of replicas.
    // We want to accomodate some RPCs for redundant replica scans (but are still in progress)
    BoundedCompletionService<Pair<Result[], ScannerCallable>> cs =
        new BoundedCompletionService<Pair<Result[], ScannerCallable>>(pool, rl.size() * 5);

    List<ExecutionException> exceptions = null;
    int submitted = 0, completed = 0;
    AtomicBoolean done = new AtomicBoolean(false);
    replicaSwitched.set(false);
    // submit call for the primary replica.
    submitted += addCallsForCurrentReplica(cs, rl);
    try {
      // wait for the timeout to see whether the primary responds back
      Future<Pair<Result[], ScannerCallable>> f =
          cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds
      if (f != null) {
        Pair<Result[], ScannerCallable> r = f.get();
        if (r != null && r.getSecond() != null) {
          updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
        }
        return r == null ? null : r.getFirst(); // great we got a response
      }
    } catch (ExecutionException e) {
      // the primary call failed with RetriesExhaustedException or DoNotRetryIOException
      // but the secondaries might still succeed. Continue on the replica RPCs.
      exceptions = new ArrayList<ExecutionException>(rl.size());
      exceptions.add(e);
      completed++;
    } catch (CancellationException e) {
      throw new InterruptedIOException(e.getMessage());
    } catch (InterruptedException e) {
      throw new InterruptedIOException(e.getMessage());
    }
    // submit call for the all of the secondaries at once
    // TODO: this may be an overkill for large region replication
    submitted += addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1);
    try {
      while (completed < submitted) {
        try {
          Future<Pair<Result[], ScannerCallable>> f = cs.take();
          Pair<Result[], ScannerCallable> r = f.get();
          if (r != null && r.getSecond() != null) {
            updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
          }
          return r == null ? null : r.getFirst(); // great we got an answer
        } catch (ExecutionException e) {
          // if not cancel or interrupt, wait until all RPC's are done
          // one of the tasks failed. Save the exception for later.
          if (exceptions == null) exceptions = new ArrayList<ExecutionException>(rl.size());
          exceptions.add(e);
          completed++;
        }
      }
    } catch (CancellationException e) {
      throw new InterruptedIOException(e.getMessage());
    } catch (InterruptedException e) {
      throw new InterruptedIOException(e.getMessage());
    } finally {
      // We get there because we were interrupted or because one or more of the
      // calls succeeded or failed. In all case, we stop all our tasks.
      cs.cancelAll(true);
    }

    if (exceptions != null && !exceptions.isEmpty()) {
      RpcRetryingCallerWithReadReplicas.throwEnrichedException(
          exceptions.get(0), retries); // just rethrow the first exception for now.
    }
    return null; // unreachable
  }
    public void append(
        TableName tableName, byte[] encodedRegionName, byte[] row, List<Entry> entries)
        throws IOException {

      if (disabledAndDroppedTables.getIfPresent(tableName) != null) {
        if (LOG.isTraceEnabled()) {
          LOG.trace(
              "Skipping "
                  + entries.size()
                  + " entries because table "
                  + tableName
                  + " is cached as a disabled or dropped table");
          for (Entry entry : entries) {
            LOG.trace("Skipping : " + entry);
          }
        }
        sink.getSkippedEditsCounter().addAndGet(entries.size());
        return;
      }

      // If the table is disabled or dropped, we should not replay the entries, and we can skip
      // replaying them. However, we might not know whether the table is disabled until we
      // invalidate the cache and check from meta
      RegionLocations locations = null;
      boolean useCache = true;
      while (true) {
        // get the replicas of the primary region
        try {
          locations =
              RegionReplicaReplayCallable.getRegionLocations(
                  connection, tableName, row, useCache, 0);

          if (locations == null) {
            throw new HBaseIOException(
                "Cannot locate locations for " + tableName + ", row:" + Bytes.toStringBinary(row));
          }
        } catch (TableNotFoundException e) {
          if (LOG.isTraceEnabled()) {
            LOG.trace(
                "Skipping "
                    + entries.size()
                    + " entries because table "
                    + tableName
                    + " is dropped. Adding table to cache.");
            for (Entry entry : entries) {
              LOG.trace("Skipping : " + entry);
            }
          }
          disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache. Value ignored
          // skip this entry
          sink.getSkippedEditsCounter().addAndGet(entries.size());
          return;
        }

        // check whether we should still replay this entry. If the regions are changed, or the
        // entry is not coming from the primary region, filter it out.
        HRegionLocation primaryLocation = locations.getDefaultRegionLocation();
        if (!Bytes.equals(
            primaryLocation.getRegionInfo().getEncodedNameAsBytes(), encodedRegionName)) {
          if (useCache) {
            useCache = false;
            continue; // this will retry location lookup
          }
          if (LOG.isTraceEnabled()) {
            LOG.trace(
                "Skipping "
                    + entries.size()
                    + " entries in table "
                    + tableName
                    + " because located region "
                    + primaryLocation.getRegionInfo().getEncodedName()
                    + " is different than the original region "
                    + Bytes.toStringBinary(encodedRegionName)
                    + " from WALEdit");
            for (Entry entry : entries) {
              LOG.trace("Skipping : " + entry);
            }
          }
          sink.getSkippedEditsCounter().addAndGet(entries.size());
          return;
        }
        break;
      }

      if (locations.size() == 1) {
        return;
      }

      ArrayList<Future<ReplicateWALEntryResponse>> tasks =
          new ArrayList<Future<ReplicateWALEntryResponse>>(locations.size() - 1);

      // All passed entries should belong to one region because it is coming from the EntryBuffers
      // split per region. But the regions might split and merge (unlike log recovery case).
      for (int replicaId = 0; replicaId < locations.size(); replicaId++) {
        HRegionLocation location = locations.getRegionLocation(replicaId);
        if (!RegionReplicaUtil.isDefaultReplica(replicaId)) {
          HRegionInfo regionInfo =
              location == null
                  ? RegionReplicaUtil.getRegionInfoForReplica(
                      locations.getDefaultRegionLocation().getRegionInfo(), replicaId)
                  : location.getRegionInfo();
          RegionReplicaReplayCallable callable =
              new RegionReplicaReplayCallable(
                  connection,
                  rpcControllerFactory,
                  tableName,
                  location,
                  regionInfo,
                  row,
                  entries,
                  sink.getSkippedEditsCounter());
          Future<ReplicateWALEntryResponse> task =
              pool.submit(
                  new RetryingRpcCallable<ReplicateWALEntryResponse>(
                      rpcRetryingCallerFactory, callable, operationTimeout));
          tasks.add(task);
        }
      }

      boolean tasksCancelled = false;
      for (Future<ReplicateWALEntryResponse> task : tasks) {
        try {
          task.get();
        } catch (InterruptedException e) {
          throw new InterruptedIOException(e.getMessage());
        } catch (ExecutionException e) {
          Throwable cause = e.getCause();
          if (cause instanceof IOException) {
            // The table can be disabled or dropped at this time. For disabled tables, we have no
            // cheap mechanism to detect this case because meta does not contain this information.
            // HConnection.isTableDisabled() is a zk call which we cannot do for every replay RPC.
            // So instead we start the replay RPC with retries and
            // check whether the table is dropped or disabled which might cause
            // SocketTimeoutException, or RetriesExhaustedException or similar if we get IOE.
            if (cause instanceof TableNotFoundException || connection.isTableDisabled(tableName)) {
              if (LOG.isTraceEnabled()) {
                LOG.trace(
                    "Skipping "
                        + entries.size()
                        + " entries in table "
                        + tableName
                        + " because received exception for dropped or disabled table",
                    cause);
                for (Entry entry : entries) {
                  LOG.trace("Skipping : " + entry);
                }
              }
              disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache for later.
              if (!tasksCancelled) {
                sink.getSkippedEditsCounter().addAndGet(entries.size());
                tasksCancelled = true; // so that we do not add to skipped counter again
              }
              continue;
            }
            // otherwise rethrow
            throw (IOException) cause;
          }
          // unexpected exception
          throw new IOException(cause);
        }
      }
    }