public RegionReplicaSinkWriter(
        RegionReplicaOutputSink sink,
        ClusterConnection connection,
        ExecutorService pool,
        int operationTimeout) {
      this.sink = sink;
      this.connection = connection;
      this.operationTimeout = operationTimeout;
      this.rpcRetryingCallerFactory =
          RpcRetryingCallerFactory.instantiate(connection.getConfiguration());
      this.rpcControllerFactory = RpcControllerFactory.instantiate(connection.getConfiguration());
      this.pool = pool;

      int nonExistentTableCacheExpiryMs =
          connection
              .getConfiguration()
              .getInt(
                  "hbase.region.replica.replication.cache.disabledAndDroppedTables.expiryMs", 5000);
      // A cache for non existing tables that have a default expiry of 5 sec. This means that if the
      // table is created again with the same name, we might miss to replicate for that amount of
      // time. But this cache prevents overloading meta requests for every edit from a deleted file.
      disabledAndDroppedTables =
          CacheBuilder.newBuilder()
              .expireAfterWrite(nonExistentTableCacheExpiryMs, TimeUnit.MILLISECONDS)
              .initialCapacity(10)
              .maximumSize(1000)
              .build();
    }
    public RegionReplicaOutputSink(
        PipelineController controller,
        TableDescriptors tableDescriptors,
        EntryBuffers entryBuffers,
        ClusterConnection connection,
        ExecutorService pool,
        int numWriters,
        int operationTimeout) {
      super(controller, entryBuffers, numWriters);
      this.sinkWriter = new RegionReplicaSinkWriter(this, connection, pool, operationTimeout);
      this.tableDescriptors = tableDescriptors;

      // A cache for the table "memstore replication enabled" flag.
      // It has a default expiry of 5 sec. This means that if the table is altered
      // with a different flag value, we might miss to replicate for that amount of
      // time. But this cache avoid the slow lookup and parsing of the TableDescriptor.
      int memstoreReplicationEnabledCacheExpiryMs =
          connection
              .getConfiguration()
              .getInt(
                  "hbase.region.replica.replication.cache.memstoreReplicationEnabled.expiryMs",
                  5000);
      this.memstoreReplicationEnabled =
          CacheBuilder.newBuilder()
              .expireAfterWrite(memstoreReplicationEnabledCacheExpiryMs, TimeUnit.MILLISECONDS)
              .initialCapacity(10)
              .maximumSize(1000)
              .build();
    }
 @Override
 protected void doStop() {
   if (outputSink != null) {
     try {
       outputSink.finishWritingAndClose();
     } catch (IOException ex) {
       LOG.warn("Got exception while trying to close OutputSink");
       LOG.warn(ex);
     }
   }
   if (this.pool != null) {
     this.pool.shutdownNow();
     try {
       // wait for 10 sec
       boolean shutdown = this.pool.awaitTermination(10000, TimeUnit.MILLISECONDS);
       if (!shutdown) {
         LOG.warn("Failed to shutdown the thread pool after 10 seconds");
       }
     } catch (InterruptedException e) {
       LOG.warn("Got interrupted while waiting for the thread pool to shut down" + e);
     }
   }
   if (connection != null) {
     try {
       connection.close();
     } catch (IOException ex) {
       LOG.warn("Got exception closing connection :" + ex);
     }
   }
   super.doStop();
 }
Example #4
0
 public void close() {
   if (connection != null) {
     try {
       connection.close();
     } catch (Exception ex) {
       LOG.warn("Could not close the connection: " + ex);
     }
   }
 }
Example #5
0
 /**
  * @param sn ServerName to get a connection against.
  * @return The AdminProtocol we got when we connected to <code>sn</code> May have come from cache,
  *     may not be good, may have been setup by this invocation, or may be null.
  * @throws IOException
  */
 @SuppressWarnings("deprecation")
 private static AdminService.BlockingInterface getCachedConnection(
     ClusterConnection connection, ServerName sn) throws IOException {
   if (sn == null) {
     return null;
   }
   AdminService.BlockingInterface service = null;
   try {
     service = connection.getAdmin(sn);
   } catch (RetriesExhaustedException e) {
     if (e.getCause() != null && e.getCause() instanceof ConnectException) {
       // Catch this; presume it means the cached connection has gone bad.
     } else {
       throw e;
     }
   } catch (SocketTimeoutException e) {
     LOG.debug("Timed out connecting to " + sn);
   } catch (NoRouteToHostException e) {
     LOG.debug("Connecting to " + sn, e);
   } catch (SocketException e) {
     LOG.debug("Exception connecting to " + sn);
   } catch (UnknownHostException e) {
     LOG.debug("Unknown host exception connecting to  " + sn);
   } catch (FailedServerException e) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Server " + sn + " is in failed server list.");
     }
   } catch (IOException ioe) {
     Throwable cause = ioe.getCause();
     if (ioe instanceof ConnectException) {
       // Catch. Connect refused.
     } else if (cause != null && cause instanceof EOFException) {
       // Catch. Other end disconnected us.
     } else if (cause != null
         && cause.getMessage() != null
         && cause.getMessage().toLowerCase(Locale.ROOT).contains("connection reset")) {
       // Catch. Connection reset.
     } else {
       throw ioe;
     }
   }
   return service;
 }
Example #6
0
 // TODO: We should be able to get the ServerName from the AdminProtocol
 // rather than have to pass it in.  Its made awkward by the fact that the
 // HRI is likely a proxy against remote server so the getServerName needs
 // to be fixed to go to a local method or to a cache before we can do this.
 private boolean verifyRegionLocation(
     final ClusterConnection connection,
     AdminService.BlockingInterface hostingServer,
     final ServerName address,
     final byte[] regionName)
     throws IOException {
   if (hostingServer == null) {
     LOG.info("Passed hostingServer is null");
     return false;
   }
   Throwable t;
   PayloadCarryingRpcController controller = connection.getRpcControllerFactory().newController();
   try {
     // Try and get regioninfo from the hosting server.
     return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
   } catch (ConnectException e) {
     t = e;
   } catch (RetriesExhaustedException e) {
     t = e;
   } catch (RemoteException e) {
     IOException ioe = e.unwrapRemoteException();
     t = ioe;
   } catch (IOException e) {
     Throwable cause = e.getCause();
     if (cause != null && cause instanceof EOFException) {
       t = cause;
     } else if (cause != null
         && cause.getMessage() != null
         && cause.getMessage().contains("Connection reset")) {
       t = cause;
     } else {
       t = e;
     }
   }
   LOG.info(
       "Failed verification of "
           + Bytes.toStringBinary(regionName)
           + " at address="
           + address
           + ", exception="
           + t.getMessage());
   return false;
 }
    public void append(
        TableName tableName, byte[] encodedRegionName, byte[] row, List<Entry> entries)
        throws IOException {

      if (disabledAndDroppedTables.getIfPresent(tableName) != null) {
        if (LOG.isTraceEnabled()) {
          LOG.trace(
              "Skipping "
                  + entries.size()
                  + " entries because table "
                  + tableName
                  + " is cached as a disabled or dropped table");
          for (Entry entry : entries) {
            LOG.trace("Skipping : " + entry);
          }
        }
        sink.getSkippedEditsCounter().addAndGet(entries.size());
        return;
      }

      // If the table is disabled or dropped, we should not replay the entries, and we can skip
      // replaying them. However, we might not know whether the table is disabled until we
      // invalidate the cache and check from meta
      RegionLocations locations = null;
      boolean useCache = true;
      while (true) {
        // get the replicas of the primary region
        try {
          locations =
              RegionReplicaReplayCallable.getRegionLocations(
                  connection, tableName, row, useCache, 0);

          if (locations == null) {
            throw new HBaseIOException(
                "Cannot locate locations for " + tableName + ", row:" + Bytes.toStringBinary(row));
          }
        } catch (TableNotFoundException e) {
          if (LOG.isTraceEnabled()) {
            LOG.trace(
                "Skipping "
                    + entries.size()
                    + " entries because table "
                    + tableName
                    + " is dropped. Adding table to cache.");
            for (Entry entry : entries) {
              LOG.trace("Skipping : " + entry);
            }
          }
          disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache. Value ignored
          // skip this entry
          sink.getSkippedEditsCounter().addAndGet(entries.size());
          return;
        }

        // check whether we should still replay this entry. If the regions are changed, or the
        // entry is not coming from the primary region, filter it out.
        HRegionLocation primaryLocation = locations.getDefaultRegionLocation();
        if (!Bytes.equals(
            primaryLocation.getRegionInfo().getEncodedNameAsBytes(), encodedRegionName)) {
          if (useCache) {
            useCache = false;
            continue; // this will retry location lookup
          }
          if (LOG.isTraceEnabled()) {
            LOG.trace(
                "Skipping "
                    + entries.size()
                    + " entries in table "
                    + tableName
                    + " because located region "
                    + primaryLocation.getRegionInfo().getEncodedName()
                    + " is different than the original region "
                    + Bytes.toStringBinary(encodedRegionName)
                    + " from WALEdit");
            for (Entry entry : entries) {
              LOG.trace("Skipping : " + entry);
            }
          }
          sink.getSkippedEditsCounter().addAndGet(entries.size());
          return;
        }
        break;
      }

      if (locations.size() == 1) {
        return;
      }

      ArrayList<Future<ReplicateWALEntryResponse>> tasks =
          new ArrayList<Future<ReplicateWALEntryResponse>>(locations.size() - 1);

      // All passed entries should belong to one region because it is coming from the EntryBuffers
      // split per region. But the regions might split and merge (unlike log recovery case).
      for (int replicaId = 0; replicaId < locations.size(); replicaId++) {
        HRegionLocation location = locations.getRegionLocation(replicaId);
        if (!RegionReplicaUtil.isDefaultReplica(replicaId)) {
          HRegionInfo regionInfo =
              location == null
                  ? RegionReplicaUtil.getRegionInfoForReplica(
                      locations.getDefaultRegionLocation().getRegionInfo(), replicaId)
                  : location.getRegionInfo();
          RegionReplicaReplayCallable callable =
              new RegionReplicaReplayCallable(
                  connection,
                  rpcControllerFactory,
                  tableName,
                  location,
                  regionInfo,
                  row,
                  entries,
                  sink.getSkippedEditsCounter());
          Future<ReplicateWALEntryResponse> task =
              pool.submit(
                  new RetryingRpcCallable<ReplicateWALEntryResponse>(
                      rpcRetryingCallerFactory, callable, operationTimeout));
          tasks.add(task);
        }
      }

      boolean tasksCancelled = false;
      for (Future<ReplicateWALEntryResponse> task : tasks) {
        try {
          task.get();
        } catch (InterruptedException e) {
          throw new InterruptedIOException(e.getMessage());
        } catch (ExecutionException e) {
          Throwable cause = e.getCause();
          if (cause instanceof IOException) {
            // The table can be disabled or dropped at this time. For disabled tables, we have no
            // cheap mechanism to detect this case because meta does not contain this information.
            // HConnection.isTableDisabled() is a zk call which we cannot do for every replay RPC.
            // So instead we start the replay RPC with retries and
            // check whether the table is dropped or disabled which might cause
            // SocketTimeoutException, or RetriesExhaustedException or similar if we get IOE.
            if (cause instanceof TableNotFoundException || connection.isTableDisabled(tableName)) {
              if (LOG.isTraceEnabled()) {
                LOG.trace(
                    "Skipping "
                        + entries.size()
                        + " entries in table "
                        + tableName
                        + " because received exception for dropped or disabled table",
                    cause);
                for (Entry entry : entries) {
                  LOG.trace("Skipping : " + entry);
                }
              }
              disabledAndDroppedTables.put(tableName, Boolean.TRUE); // put to cache for later.
              if (!tasksCancelled) {
                sink.getSkippedEditsCounter().addAndGet(entries.size());
                tasksCancelled = true; // so that we do not add to skipped counter again
              }
              continue;
            }
            // otherwise rethrow
            throw (IOException) cause;
          }
          // unexpected exception
          throw new IOException(cause);
        }
      }
    }