예제 #1
0
 /**
  * Provides different handling for various kinds of master errors: re-uses the mechanisms already
  * in place for handling tablet server errors as much as possible.
  *
  * @param rpc The original RPC call that triggered the error.
  * @param error The error the master sent.
  * @return An exception if we couldn't dispatch the error, or null.
  */
 private Exception dispatchMasterErrorOrReturnException(KuduRpc rpc, Master.MasterErrorPB error) {
   WireProtocol.AppStatusPB.ErrorCode code = error.getStatus().getCode();
   MasterErrorException ex = new MasterErrorException(uuid, error);
   if (error.getCode() == Master.MasterErrorPB.Code.NOT_THE_LEADER) {
     kuduClient.handleNotLeader(rpc, ex, this);
   } else if (code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE
       && (!(rpc instanceof GetMasterRegistrationRequest))) {
     // TODO: This is a crutch until we either don't have to retry RPCs going to the
     // same server or use retry policies.
     kuduClient.handleRetryableError(rpc, ex);
   } else {
     return ex;
   }
   return null;
 }
예제 #2
0
  @Override
  public void exceptionCaught(final ChannelHandlerContext ctx, final ExceptionEvent event) {
    final Throwable e = event.getCause();
    final Channel c = event.getChannel();

    if (e instanceof RejectedExecutionException) {
      LOG.warn(
          getPeerUuidLoggingString()
              + "RPC rejected by the executor,"
              + " ignore this if we're shutting down",
          e);
    } else if (e instanceof ReadTimeoutException) {
      LOG.debug(getPeerUuidLoggingString() + "Encountered a read timeout");
      // Doing the cleanup here since we want to invalidate all the RPCs right _now_, and not let
      // the ReplayingDecoder continue decoding through Channels.close() below.
      cleanup(c);
    } else {
      LOG.error(getPeerUuidLoggingString() + "Unexpected exception from downstream on " + c, e);
      // For any other exception, likely a connection error, we clear the leader state
      // for those tablets that this TS is the cached leader of.
      kuduClient.demoteAsLeaderForAllTablets(this);
    }
    if (c.isOpen()) {
      Channels.close(c); // Will trigger channelClosed(), which will cleanup()
    } else { // else: presumably a connection timeout.
      cleanup(c); // => need to cleanup() from here directly.
    }
  }
예제 #3
0
 public TabletClient(AsyncKuduClient client, String uuid, String host, int port) {
   this.kuduClient = client;
   this.uuid = uuid;
   this.socketReadTimeoutMs = client.getDefaultSocketReadTimeoutMs();
   this.host = host;
   this.port = port;
 }
예제 #4
0
  protected static KuduTable createFourTabletsTableWithNineRows(String tableName) throws Exception {
    CreateTableBuilder builder = new CreateTableBuilder();
    PartialRow splitRow = basicSchema.newPartialRow();
    for (int i : KEYS) {
      splitRow.addInt(0, i);
      builder.addSplitRow(splitRow);
    }
    KuduTable table = createTable(tableName, basicSchema, builder);
    AsyncKuduSession session = client.newSession();

    // create a table with on empty tablet and 3 tablets of 3 rows each
    for (int key1 : KEYS) {
      for (int key2 = 1; key2 <= 3; key2++) {
        Insert insert = table.newInsert();
        PartialRow row = insert.getRow();
        row.addInt(0, key1 + key2);
        row.addInt(1, key1);
        row.addInt(2, key2);
        row.addString(3, "a string");
        row.addBoolean(4, true);
        session.apply(insert).join(DEFAULT_SLEEP);
      }
    }
    session.close().join(DEFAULT_SLEEP);
    return table;
  }
예제 #5
0
 protected static KuduTable createTable(
     String tableName, Schema schema, CreateTableBuilder builder) {
   LOG.info("Creating table: {}", tableName);
   Deferred<KuduTable> d = client.createTable(tableName, schema, builder);
   final AtomicBoolean gotError = new AtomicBoolean(false);
   d.addErrback(
       new Callback<Object, Object>() {
         @Override
         public Object call(Object arg) throws Exception {
           gotError.set(true);
           LOG.error("Error : " + arg);
           return null;
         }
       });
   KuduTable table = null;
   try {
     table = d.join(DEFAULT_SLEEP);
   } catch (Exception e) {
     fail("Timed out");
   }
   if (gotError.get()) {
     fail(
         "Got error during table creation, is the Kudu master running at "
             + masterAddresses
             + "?");
   }
   tableNames.add(tableName);
   return table;
 }
예제 #6
0
 /**
  * Takes care of a few kinds of TS errors that we handle differently, like tablets or leaders
  * moving. Builds and returns an exception if we don't know what to do with it.
  *
  * @param rpc The original RPC call that triggered the error.
  * @param error The error the TS sent.
  * @return An exception if we couldn't dispatch the error, or null.
  */
 private Exception dispatchTSErrorOrReturnException(
     KuduRpc rpc, Tserver.TabletServerErrorPB error) {
   WireProtocol.AppStatusPB.ErrorCode code = error.getStatus().getCode();
   TabletServerErrorException ex = new TabletServerErrorException(uuid, error.getStatus());
   if (error.getCode() == Tserver.TabletServerErrorPB.Code.TABLET_NOT_FOUND) {
     kuduClient.handleTabletNotFound(rpc, ex, this);
     // we're not calling rpc.callback() so we rely on the client to retry that RPC
   } else if (code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE) {
     kuduClient.handleRetryableError(rpc, ex);
     // The following two error codes are an indication that the tablet isn't a leader.
   } else if (code == WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE
       || code == WireProtocol.AppStatusPB.ErrorCode.ABORTED) {
     kuduClient.handleNotLeader(rpc, ex, this);
   } else {
     return ex;
   }
   return null;
 }
예제 #7
0
 /**
  * Retry the given RPC.
  *
  * @param rpc an RPC to retry or fail
  * @param exception an exception to propagate with the RPC
  */
 private void failOrRetryRpc(final KuduRpc<?> rpc, final ConnectionResetException exception) {
   AsyncKuduClient.RemoteTablet tablet = rpc.getTablet();
   // Note As of the time of writing (03/11/16), a null tablet doesn't make sense, if we see a null
   // tablet it's because we didn't set it properly before calling sendRpc().
   if (tablet == null) { // Can't retry, dunno where this RPC should go.
     rpc.errback(exception);
   } else {
     kuduClient.handleRetryableError(rpc, exception);
   }
 }
예제 #8
0
 /**
  * Wait up to DEFAULT_SLEEP for an expected count of TS to connect to the master
  *
  * @param expected How many TS are expected
  * @return true if there are at least as many TS as expected, otherwise false
  */
 static boolean waitForTabletServers(int expected) throws Exception {
   int count = 0;
   Stopwatch stopwatch = new Stopwatch().start();
   while (count < expected && stopwatch.elapsedMillis() < DEFAULT_SLEEP) {
     Thread.sleep(200);
     Deferred<ListTabletServersResponse> d = client.listTabletServers();
     d.addErrback(defaultErrorCB);
     count = d.join(DEFAULT_SLEEP).getTabletServersCount();
   }
   return count >= expected;
 }
예제 #9
0
 /**
  * Find the port of the leader master in order to retrieve it from the port to process map.
  *
  * @return The port of the leader master.
  * @throws Exception If we are unable to find the leader master.
  */
 protected static int findLeaderMasterPort() throws Exception {
   Stopwatch sw = new Stopwatch().start();
   int leaderPort = -1;
   while (leaderPort == -1 && sw.elapsedMillis() < DEFAULT_SLEEP) {
     Deferred<Master.GetTableLocationsResponsePB> masterLocD = client.getMasterTableLocationsPB();
     Master.GetTableLocationsResponsePB r = masterLocD.join(DEFAULT_SLEEP);
     leaderPort = r.getTabletLocations(0).getReplicas(0).getTsInfo().getRpcAddresses(0).getPort();
   }
   if (leaderPort == -1) {
     fail("No leader master found after " + DEFAULT_SLEEP + " ms.");
   }
   return leaderPort;
 }
예제 #10
0
 @AfterClass
 public static void tearDownAfterClass() throws Exception {
   try {
     if (client != null) {
       Deferred<ArrayList<Void>> d = client.shutdown();
       d.addErrback(defaultErrorCB);
       d.join(DEFAULT_SLEEP);
       // No need to explicitly shutdown the sync client,
       // shutting down the async client effectively does that.
     }
   } finally {
     if (startCluster) {
       for (Iterator<Process> masterIter = MASTERS.values().iterator(); masterIter.hasNext(); ) {
         masterIter.next().destroy();
         masterIter.remove();
       }
       for (Iterator<Process> tsIter = TABLET_SERVERS.values().iterator(); tsIter.hasNext(); ) {
         tsIter.next().destroy();
         tsIter.remove();
       }
       for (Thread thread : PROCESS_INPUT_PRINTERS) {
         thread.interrupt();
       }
     }
     for (String path : pathsToDelete) {
       try {
         File f = new File(path);
         if (f.isDirectory()) {
           FileUtils.deleteDirectory(f);
         } else {
           f.delete();
         }
       } catch (Exception e) {
         LOG.warn("Could not delete path {}", path, e);
       }
     }
   }
 }
예제 #11
0
  /**
   * The reason we are suppressing the unchecked conversions is because the KuduRpc is coming from a
   * collection that has RPCs with different generics, and there's no way to get "decoded" casted
   * correctly. The best we can do is to rely on the RPC to decode correctly, and to not pass an
   * Exception in the callback.
   */
  @Override
  @SuppressWarnings("unchecked")
  protected Object decode(
      ChannelHandlerContext ctx, Channel chan, ChannelBuffer buf, VoidEnum voidEnum) {
    final long start = System.nanoTime();
    final int rdx = buf.readerIndex();
    LOG.debug("------------------>> ENTERING DECODE >>------------------");

    try {
      buf = secureRpcHelper.handleResponse(buf, chan);
    } catch (SaslException e) {
      String message = getPeerUuidLoggingString() + "Couldn't complete the SASL handshake";
      LOG.error(message);
      throw new NonRecoverableException(message, e);
    }
    if (buf == null) {
      return null;
    }

    CallResponse response = new CallResponse(buf);

    RpcHeader.ResponseHeader header = response.getHeader();
    if (!header.hasCallId()) {
      final int size = response.getTotalResponseSize();
      final String msg =
          getPeerUuidLoggingString()
              + "RPC response (size: "
              + size
              + ") doesn't"
              + " have a call ID: "
              + header
              + ", buf="
              + Bytes.pretty(buf);
      LOG.error(msg);
      throw new NonRecoverableException(msg);
    }
    final int rpcid = header.getCallId();

    @SuppressWarnings("rawtypes")
    final KuduRpc rpc = rpcs_inflight.get(rpcid);

    if (rpc == null) {
      final String msg =
          getPeerUuidLoggingString()
              + "Invalid rpcid: "
              + rpcid
              + " found in "
              + buf
              + '='
              + Bytes.pretty(buf);
      LOG.error(msg);
      // The problem here is that we don't know which Deferred corresponds to
      // this RPC, since we don't have a valid ID.  So we're hopeless, we'll
      // never be able to recover because responses are not framed, we don't
      // know where the next response will start...  We have to give up here
      // and throw this outside of our Netty handler, so Netty will call our
      // exception handler where we'll close this channel, which will cause
      // all RPCs in flight to be failed.
      throw new NonRecoverableException(msg);
    }

    Pair<Object, Object> decoded = null;
    Exception exception = null;
    KuduException retryableHeaderException = null;
    if (header.hasIsError() && header.getIsError()) {
      RpcHeader.ErrorStatusPB.Builder errorBuilder = RpcHeader.ErrorStatusPB.newBuilder();
      KuduRpc.readProtobuf(response.getPBMessage(), errorBuilder);
      RpcHeader.ErrorStatusPB error = errorBuilder.build();
      if (error.getCode().equals(RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_SERVER_TOO_BUSY)) {
        // We can't return right away, we still need to remove ourselves from 'rpcs_inflight', so we
        // populate 'retryableHeaderException'.
        retryableHeaderException = new TabletServerErrorException(uuid, error);
      } else {
        String message =
            getPeerUuidLoggingString() + "Tablet server sent error " + error.getMessage();
        exception = new NonRecoverableException(message);
        LOG.error(message); // can be useful
      }
    } else {
      try {
        decoded = rpc.deserialize(response, this.uuid);
      } catch (Exception ex) {
        exception = ex;
      }
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug(
          getPeerUuidLoggingString()
              + "rpcid="
              + rpcid
              + ", response size="
              + (buf.readerIndex() - rdx)
              + " bytes"
              + ", "
              + actualReadableBytes()
              + " readable bytes left"
              + ", rpc="
              + rpc);
    }

    {
      final KuduRpc<?> removed = rpcs_inflight.remove(rpcid);
      if (removed == null) {
        // The RPC we were decoding was cleaned up already, give up.
        throw new NonRecoverableException("RPC not found");
      }
    }

    // This check is specifically for the ERROR_SERVER_TOO_BUSY case above.
    if (retryableHeaderException != null) {
      kuduClient.handleRetryableError(rpc, retryableHeaderException);
      return null;
    }

    // We can get this Message from within the RPC's expected type,
    // so convert it into an exception and nullify decoded so that we use the errback route.
    // Have to do it for both TS and Master errors.
    if (decoded != null) {
      if (decoded.getSecond() instanceof Tserver.TabletServerErrorPB) {
        Tserver.TabletServerErrorPB error = (Tserver.TabletServerErrorPB) decoded.getSecond();
        exception = dispatchTSErrorOrReturnException(rpc, error);
        if (exception == null) {
          // It was taken care of.
          return null;
        } else {
          // We're going to errback.
          decoded = null;
        }

      } else if (decoded.getSecond() instanceof Master.MasterErrorPB) {
        Master.MasterErrorPB error = (Master.MasterErrorPB) decoded.getSecond();
        exception = dispatchMasterErrorOrReturnException(rpc, error);
        if (exception == null) {
          // Exception was taken care of.
          return null;
        } else {
          decoded = null;
        }
      }
    }

    try {
      if (decoded != null) {
        assert !(decoded.getFirst() instanceof Exception);
        if (kuduClient.isStatisticsEnabled()) {
          rpc.updateStatistics(kuduClient.getStatistics(), decoded.getFirst());
        }
        rpc.callback(decoded.getFirst());
      } else {
        if (kuduClient.isStatisticsEnabled()) {
          rpc.updateStatistics(kuduClient.getStatistics(), null);
        }
        rpc.errback(exception);
      }
    } catch (Exception e) {
      LOG.debug(
          getPeerUuidLoggingString()
              + "Unexpected exception while handling RPC #"
              + rpcid
              + ", rpc="
              + rpc
              + ", buf="
              + Bytes.pretty(buf),
          e);
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "------------------<< LEAVING  DECODE <<------------------"
              + " time elapsed: "
              + ((System.nanoTime() - start) / 1000)
              + "us");
    }
    return null; // Stop processing here.  The Deferred does everything else.
  }
예제 #12
0
 AbstractKuduScannerBuilder(AsyncKuduClient client, KuduTable table) {
   this.client = client;
   this.table = table;
   this.columnRangePredicates = new ArrayList<>();
   this.scanRequestTimeout = client.getDefaultOperationTimeoutMs();
 }
예제 #13
0
 /**
  * Helper method to open a table. It sets the default sleep time when joining on the Deferred.
  *
  * @param name Name of the table
  * @return A KuduTable
  * @throws Exception MasterErrorException if the table doesn't exist
  */
 protected static KuduTable openTable(String name) throws Exception {
   Deferred<KuduTable> d = client.openTable(name);
   return d.join(DEFAULT_SLEEP);
 }