/** * Provides different handling for various kinds of master errors: re-uses the mechanisms already * in place for handling tablet server errors as much as possible. * * @param rpc The original RPC call that triggered the error. * @param error The error the master sent. * @return An exception if we couldn't dispatch the error, or null. */ private Exception dispatchMasterErrorOrReturnException(KuduRpc rpc, Master.MasterErrorPB error) { WireProtocol.AppStatusPB.ErrorCode code = error.getStatus().getCode(); MasterErrorException ex = new MasterErrorException(uuid, error); if (error.getCode() == Master.MasterErrorPB.Code.NOT_THE_LEADER) { kuduClient.handleNotLeader(rpc, ex, this); } else if (code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE && (!(rpc instanceof GetMasterRegistrationRequest))) { // TODO: This is a crutch until we either don't have to retry RPCs going to the // same server or use retry policies. kuduClient.handleRetryableError(rpc, ex); } else { return ex; } return null; }
@Override public void exceptionCaught(final ChannelHandlerContext ctx, final ExceptionEvent event) { final Throwable e = event.getCause(); final Channel c = event.getChannel(); if (e instanceof RejectedExecutionException) { LOG.warn( getPeerUuidLoggingString() + "RPC rejected by the executor," + " ignore this if we're shutting down", e); } else if (e instanceof ReadTimeoutException) { LOG.debug(getPeerUuidLoggingString() + "Encountered a read timeout"); // Doing the cleanup here since we want to invalidate all the RPCs right _now_, and not let // the ReplayingDecoder continue decoding through Channels.close() below. cleanup(c); } else { LOG.error(getPeerUuidLoggingString() + "Unexpected exception from downstream on " + c, e); // For any other exception, likely a connection error, we clear the leader state // for those tablets that this TS is the cached leader of. kuduClient.demoteAsLeaderForAllTablets(this); } if (c.isOpen()) { Channels.close(c); // Will trigger channelClosed(), which will cleanup() } else { // else: presumably a connection timeout. cleanup(c); // => need to cleanup() from here directly. } }
public TabletClient(AsyncKuduClient client, String uuid, String host, int port) { this.kuduClient = client; this.uuid = uuid; this.socketReadTimeoutMs = client.getDefaultSocketReadTimeoutMs(); this.host = host; this.port = port; }
protected static KuduTable createFourTabletsTableWithNineRows(String tableName) throws Exception { CreateTableBuilder builder = new CreateTableBuilder(); PartialRow splitRow = basicSchema.newPartialRow(); for (int i : KEYS) { splitRow.addInt(0, i); builder.addSplitRow(splitRow); } KuduTable table = createTable(tableName, basicSchema, builder); AsyncKuduSession session = client.newSession(); // create a table with on empty tablet and 3 tablets of 3 rows each for (int key1 : KEYS) { for (int key2 = 1; key2 <= 3; key2++) { Insert insert = table.newInsert(); PartialRow row = insert.getRow(); row.addInt(0, key1 + key2); row.addInt(1, key1); row.addInt(2, key2); row.addString(3, "a string"); row.addBoolean(4, true); session.apply(insert).join(DEFAULT_SLEEP); } } session.close().join(DEFAULT_SLEEP); return table; }
protected static KuduTable createTable( String tableName, Schema schema, CreateTableBuilder builder) { LOG.info("Creating table: {}", tableName); Deferred<KuduTable> d = client.createTable(tableName, schema, builder); final AtomicBoolean gotError = new AtomicBoolean(false); d.addErrback( new Callback<Object, Object>() { @Override public Object call(Object arg) throws Exception { gotError.set(true); LOG.error("Error : " + arg); return null; } }); KuduTable table = null; try { table = d.join(DEFAULT_SLEEP); } catch (Exception e) { fail("Timed out"); } if (gotError.get()) { fail( "Got error during table creation, is the Kudu master running at " + masterAddresses + "?"); } tableNames.add(tableName); return table; }
/** * Takes care of a few kinds of TS errors that we handle differently, like tablets or leaders * moving. Builds and returns an exception if we don't know what to do with it. * * @param rpc The original RPC call that triggered the error. * @param error The error the TS sent. * @return An exception if we couldn't dispatch the error, or null. */ private Exception dispatchTSErrorOrReturnException( KuduRpc rpc, Tserver.TabletServerErrorPB error) { WireProtocol.AppStatusPB.ErrorCode code = error.getStatus().getCode(); TabletServerErrorException ex = new TabletServerErrorException(uuid, error.getStatus()); if (error.getCode() == Tserver.TabletServerErrorPB.Code.TABLET_NOT_FOUND) { kuduClient.handleTabletNotFound(rpc, ex, this); // we're not calling rpc.callback() so we rely on the client to retry that RPC } else if (code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE) { kuduClient.handleRetryableError(rpc, ex); // The following two error codes are an indication that the tablet isn't a leader. } else if (code == WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE || code == WireProtocol.AppStatusPB.ErrorCode.ABORTED) { kuduClient.handleNotLeader(rpc, ex, this); } else { return ex; } return null; }
/** * Retry the given RPC. * * @param rpc an RPC to retry or fail * @param exception an exception to propagate with the RPC */ private void failOrRetryRpc(final KuduRpc<?> rpc, final ConnectionResetException exception) { AsyncKuduClient.RemoteTablet tablet = rpc.getTablet(); // Note As of the time of writing (03/11/16), a null tablet doesn't make sense, if we see a null // tablet it's because we didn't set it properly before calling sendRpc(). if (tablet == null) { // Can't retry, dunno where this RPC should go. rpc.errback(exception); } else { kuduClient.handleRetryableError(rpc, exception); } }
/** * Wait up to DEFAULT_SLEEP for an expected count of TS to connect to the master * * @param expected How many TS are expected * @return true if there are at least as many TS as expected, otherwise false */ static boolean waitForTabletServers(int expected) throws Exception { int count = 0; Stopwatch stopwatch = new Stopwatch().start(); while (count < expected && stopwatch.elapsedMillis() < DEFAULT_SLEEP) { Thread.sleep(200); Deferred<ListTabletServersResponse> d = client.listTabletServers(); d.addErrback(defaultErrorCB); count = d.join(DEFAULT_SLEEP).getTabletServersCount(); } return count >= expected; }
/** * Find the port of the leader master in order to retrieve it from the port to process map. * * @return The port of the leader master. * @throws Exception If we are unable to find the leader master. */ protected static int findLeaderMasterPort() throws Exception { Stopwatch sw = new Stopwatch().start(); int leaderPort = -1; while (leaderPort == -1 && sw.elapsedMillis() < DEFAULT_SLEEP) { Deferred<Master.GetTableLocationsResponsePB> masterLocD = client.getMasterTableLocationsPB(); Master.GetTableLocationsResponsePB r = masterLocD.join(DEFAULT_SLEEP); leaderPort = r.getTabletLocations(0).getReplicas(0).getTsInfo().getRpcAddresses(0).getPort(); } if (leaderPort == -1) { fail("No leader master found after " + DEFAULT_SLEEP + " ms."); } return leaderPort; }
@AfterClass public static void tearDownAfterClass() throws Exception { try { if (client != null) { Deferred<ArrayList<Void>> d = client.shutdown(); d.addErrback(defaultErrorCB); d.join(DEFAULT_SLEEP); // No need to explicitly shutdown the sync client, // shutting down the async client effectively does that. } } finally { if (startCluster) { for (Iterator<Process> masterIter = MASTERS.values().iterator(); masterIter.hasNext(); ) { masterIter.next().destroy(); masterIter.remove(); } for (Iterator<Process> tsIter = TABLET_SERVERS.values().iterator(); tsIter.hasNext(); ) { tsIter.next().destroy(); tsIter.remove(); } for (Thread thread : PROCESS_INPUT_PRINTERS) { thread.interrupt(); } } for (String path : pathsToDelete) { try { File f = new File(path); if (f.isDirectory()) { FileUtils.deleteDirectory(f); } else { f.delete(); } } catch (Exception e) { LOG.warn("Could not delete path {}", path, e); } } } }
/** * The reason we are suppressing the unchecked conversions is because the KuduRpc is coming from a * collection that has RPCs with different generics, and there's no way to get "decoded" casted * correctly. The best we can do is to rely on the RPC to decode correctly, and to not pass an * Exception in the callback. */ @Override @SuppressWarnings("unchecked") protected Object decode( ChannelHandlerContext ctx, Channel chan, ChannelBuffer buf, VoidEnum voidEnum) { final long start = System.nanoTime(); final int rdx = buf.readerIndex(); LOG.debug("------------------>> ENTERING DECODE >>------------------"); try { buf = secureRpcHelper.handleResponse(buf, chan); } catch (SaslException e) { String message = getPeerUuidLoggingString() + "Couldn't complete the SASL handshake"; LOG.error(message); throw new NonRecoverableException(message, e); } if (buf == null) { return null; } CallResponse response = new CallResponse(buf); RpcHeader.ResponseHeader header = response.getHeader(); if (!header.hasCallId()) { final int size = response.getTotalResponseSize(); final String msg = getPeerUuidLoggingString() + "RPC response (size: " + size + ") doesn't" + " have a call ID: " + header + ", buf=" + Bytes.pretty(buf); LOG.error(msg); throw new NonRecoverableException(msg); } final int rpcid = header.getCallId(); @SuppressWarnings("rawtypes") final KuduRpc rpc = rpcs_inflight.get(rpcid); if (rpc == null) { final String msg = getPeerUuidLoggingString() + "Invalid rpcid: " + rpcid + " found in " + buf + '=' + Bytes.pretty(buf); LOG.error(msg); // The problem here is that we don't know which Deferred corresponds to // this RPC, since we don't have a valid ID. So we're hopeless, we'll // never be able to recover because responses are not framed, we don't // know where the next response will start... We have to give up here // and throw this outside of our Netty handler, so Netty will call our // exception handler where we'll close this channel, which will cause // all RPCs in flight to be failed. throw new NonRecoverableException(msg); } Pair<Object, Object> decoded = null; Exception exception = null; KuduException retryableHeaderException = null; if (header.hasIsError() && header.getIsError()) { RpcHeader.ErrorStatusPB.Builder errorBuilder = RpcHeader.ErrorStatusPB.newBuilder(); KuduRpc.readProtobuf(response.getPBMessage(), errorBuilder); RpcHeader.ErrorStatusPB error = errorBuilder.build(); if (error.getCode().equals(RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_SERVER_TOO_BUSY)) { // We can't return right away, we still need to remove ourselves from 'rpcs_inflight', so we // populate 'retryableHeaderException'. retryableHeaderException = new TabletServerErrorException(uuid, error); } else { String message = getPeerUuidLoggingString() + "Tablet server sent error " + error.getMessage(); exception = new NonRecoverableException(message); LOG.error(message); // can be useful } } else { try { decoded = rpc.deserialize(response, this.uuid); } catch (Exception ex) { exception = ex; } } if (LOG.isDebugEnabled()) { LOG.debug( getPeerUuidLoggingString() + "rpcid=" + rpcid + ", response size=" + (buf.readerIndex() - rdx) + " bytes" + ", " + actualReadableBytes() + " readable bytes left" + ", rpc=" + rpc); } { final KuduRpc<?> removed = rpcs_inflight.remove(rpcid); if (removed == null) { // The RPC we were decoding was cleaned up already, give up. throw new NonRecoverableException("RPC not found"); } } // This check is specifically for the ERROR_SERVER_TOO_BUSY case above. if (retryableHeaderException != null) { kuduClient.handleRetryableError(rpc, retryableHeaderException); return null; } // We can get this Message from within the RPC's expected type, // so convert it into an exception and nullify decoded so that we use the errback route. // Have to do it for both TS and Master errors. if (decoded != null) { if (decoded.getSecond() instanceof Tserver.TabletServerErrorPB) { Tserver.TabletServerErrorPB error = (Tserver.TabletServerErrorPB) decoded.getSecond(); exception = dispatchTSErrorOrReturnException(rpc, error); if (exception == null) { // It was taken care of. return null; } else { // We're going to errback. decoded = null; } } else if (decoded.getSecond() instanceof Master.MasterErrorPB) { Master.MasterErrorPB error = (Master.MasterErrorPB) decoded.getSecond(); exception = dispatchMasterErrorOrReturnException(rpc, error); if (exception == null) { // Exception was taken care of. return null; } else { decoded = null; } } } try { if (decoded != null) { assert !(decoded.getFirst() instanceof Exception); if (kuduClient.isStatisticsEnabled()) { rpc.updateStatistics(kuduClient.getStatistics(), decoded.getFirst()); } rpc.callback(decoded.getFirst()); } else { if (kuduClient.isStatisticsEnabled()) { rpc.updateStatistics(kuduClient.getStatistics(), null); } rpc.errback(exception); } } catch (Exception e) { LOG.debug( getPeerUuidLoggingString() + "Unexpected exception while handling RPC #" + rpcid + ", rpc=" + rpc + ", buf=" + Bytes.pretty(buf), e); } if (LOG.isDebugEnabled()) { LOG.debug( "------------------<< LEAVING DECODE <<------------------" + " time elapsed: " + ((System.nanoTime() - start) / 1000) + "us"); } return null; // Stop processing here. The Deferred does everything else. }
AbstractKuduScannerBuilder(AsyncKuduClient client, KuduTable table) { this.client = client; this.table = table; this.columnRangePredicates = new ArrayList<>(); this.scanRequestTimeout = client.getDefaultOperationTimeoutMs(); }
/** * Helper method to open a table. It sets the default sleep time when joining on the Deferred. * * @param name Name of the table * @return A KuduTable * @throws Exception MasterErrorException if the table doesn't exist */ protected static KuduTable openTable(String name) throws Exception { Deferred<KuduTable> d = client.openTable(name); return d.join(DEFAULT_SLEEP); }