/** * Starts communication. * * @throws IgniteCheckedException If failed. */ public void start() throws IgniteCheckedException { try { locHost = U.getLocalHost(); } catch (IOException e) { throw new IgniteCheckedException("Failed to initialize local address.", e); } try { shmemSrv = resetShmemServer(); } catch (IgniteCheckedException e) { U.warn(log, "Failed to start shared memory communication server.", e); } try { // This method potentially resets local port to the value // local node was bound to. nioSrvr = resetNioServer(); } catch (IgniteCheckedException e) { throw new IgniteCheckedException("Failed to initialize TCP server: " + locHost, e); } locProcDesc.address(locHost.getHostAddress()); locProcDesc.sharedMemoryPort(boundTcpShmemPort); locProcDesc.tcpPort(boundTcpPort); locIdMsg = new ProcessHandshakeMessage(locProcDesc); if (shmemSrv != null) { shmemAcceptWorker = new ShmemAcceptWorker(shmemSrv); new IgniteThread(shmemAcceptWorker).start(); } nioSrvr.start(); }
/** * Closes all opened connections. * * @param waitCompletion If {@code true} waits for all pending requests to be proceeded. */ @SuppressWarnings("TooBroadScope") @Override public void stop(boolean waitCompletion) { Collection<GridClientConnection> closeConns; if (closed) return; // Mark manager as closed. closed = true; // Remove all connections from cache. closeConns = new ArrayList<>(conns.values()); conns.clear(); nodeConns.clear(); // Close old connection outside the writer lock. for (GridClientConnection conn : closeConns) conn.close(CLIENT_CLOSED, waitCompletion); if (pingExecutor != null) GridClientUtils.shutdownNow(GridClientConnectionManager.class, pingExecutor, log); GridClientUtils.shutdownNow(GridClientConnectionManager.class, executor, log); if (srv != null) srv.stop(); }
/** * Stops the server. * * @throws IgniteCheckedException */ public void stop() throws IgniteCheckedException { // Stop TCP server. if (nioSrvr != null) nioSrvr.stop(); U.cancel(shmemAcceptWorker); U.join(shmemAcceptWorker, log); U.cancel(shmemWorkers); U.join(shmemWorkers, log); shmemWorkers.clear(); // Force closing on stop (safety). for (HadoopCommunicationClient client : clients.values()) client.forceClose(); // Clear resources. nioSrvr = null; boundTcpPort = -1; }
/** * @param clientId Client ID. * @param sslCtx SSL context to enable secured connection or {@code null} to use unsecured one. * @param cfg Client configuration. * @param routers Routers or empty collection to use endpoints from topology info. * @param top Topology. * @param marshId Marshaller ID. * @throws GridClientException In case of error. */ @SuppressWarnings("unchecked") protected GridClientConnectionManagerAdapter( UUID clientId, SSLContext sslCtx, GridClientConfiguration cfg, Collection<InetSocketAddress> routers, GridClientTopology top, @Nullable Byte marshId, boolean routerClient) throws GridClientException { assert clientId != null : "clientId != null"; assert cfg != null : "cfg != null"; assert routers != null : "routers != null"; assert top != null : "top != null"; this.clientId = clientId; this.sslCtx = sslCtx; this.cfg = cfg; this.routers = new ArrayList<>(routers); this.top = top; log = Logger.getLogger(getClass().getName()); executor = cfg.getExecutorService() != null ? cfg.getExecutorService() : Executors.newCachedThreadPool(new GridClientThreadFactory("exec", true)); pingExecutor = cfg.getProtocol() == GridClientProtocol.TCP ? Executors.newScheduledThreadPool( Runtime.getRuntime().availableProcessors(), new GridClientThreadFactory("exec", true)) : null; this.marshId = marshId; if (marshId == null && cfg.getMarshaller() == null) throw new GridClientException("Failed to start client (marshaller is not configured)."); if (cfg.getProtocol() == GridClientProtocol.TCP) { try { IgniteLogger gridLog = new JavaLogger(false); GridNioFilter[] filters; GridNioFilter codecFilter = new GridNioCodecFilter(new GridTcpRestParser(routerClient), gridLog, false); if (sslCtx != null) { GridNioSslFilter sslFilter = new GridNioSslFilter(sslCtx, true, ByteOrder.nativeOrder(), gridLog); sslFilter.directMode(false); sslFilter.clientMode(true); filters = new GridNioFilter[] {codecFilter, sslFilter}; } else filters = new GridNioFilter[] {codecFilter}; srv = GridNioServer.builder() .address(U.getLocalHost()) .port(-1) .listener(new NioListener(log)) .filters(filters) .logger(gridLog) .selectorCount(Runtime.getRuntime().availableProcessors()) .sendQueueLimit(1024) .byteOrder(ByteOrder.nativeOrder()) .tcpNoDelay(cfg.isTcpNoDelay()) .directBuffer(true) .directMode(false) .socketReceiveBufferSize(0) .socketSendBufferSize(0) .idleTimeout(Long.MAX_VALUE) .gridName(routerClient ? "routerClient" : "gridClient") .daemon(cfg.isDaemon()) .build(); srv.start(); } catch (IOException | IgniteCheckedException e) { throw new GridClientException("Failed to start connection server.", e); } } }
/** * Establish TCP connection to remote hadoop process and returns client. * * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ protected HadoopCommunicationClient createTcpClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { String addr = desc.address(); int port = desc.tcpPort(); if (log.isDebugEnabled()) log.debug( "Trying to connect to remote process [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); boolean conn = false; HadoopTcpNioCommunicationClient client = null; IgniteCheckedException errs = null; int connectAttempts = 1; long connTimeout0 = connTimeout; int attempt = 1; while (!conn) { // Reconnection on handshake timeout. try { SocketChannel ch = SocketChannel.open(); ch.configureBlocking(true); ch.socket().setTcpNoDelay(tcpNoDelay); ch.socket().setKeepAlive(true); if (sockRcvBuf > 0) ch.socket().setReceiveBufferSize(sockRcvBuf); if (sockSndBuf > 0) ch.socket().setSendBufferSize(sockSndBuf); ch.socket().connect(new InetSocketAddress(addr, port), (int) connTimeout); HandshakeFinish fin = new HandshakeFinish(); GridNioSession ses = nioSrvr.createSession(ch, F.asMap(HANDSHAKE_FINISH_META, fin)).get(); client = new HadoopTcpNioCommunicationClient(ses); if (log.isDebugEnabled()) log.debug("Waiting for handshake finish for client: " + client); fin.await(connTimeout0); conn = true; } catch (HadoopHandshakeTimeoutException e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug( "Handshake timedout (will retry with increased timeout) [timeout=" + connTimeout0 + ", desc=" + desc + ", port=" + port + ", err=" + e + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug( "Handshake timed out (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process " + "(is process still running?) [desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); break; } else { attempt++; connTimeout0 *= 2; // Continue loop. } } catch (Exception e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug("Client creation failed [addr=" + addr + ", port=" + port + ", err=" + e + ']'); if (X.hasCause(e, SocketTimeoutException.class)) LT.warn( log, null, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", port=" + port + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process (is process still running?) " + "[desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) { connectAttempts++; continue; } break; } } if (client == null) { assert errs != null; if (X.hasCause(errs, ConnectException.class)) LT.warn( log, null, "Failed to connect to a remote Hadoop process (is process still running?). " + "Make sure operating system firewall is disabled on local and remote host) " + "[addrs=" + addr + ", port=" + port + ']'); throw errs; } if (log.isDebugEnabled()) log.debug("Created client: " + client); return client; }
/** * Recreates tpcSrvr socket instance. * * @return Server instance. * @throws IgniteCheckedException Thrown if it's not possible to create server. */ private GridNioServer<HadoopMessage> resetNioServer() throws IgniteCheckedException { if (boundTcpPort >= 0) throw new IgniteCheckedException( "Tcp NIO server was already created on port " + boundTcpPort); IgniteCheckedException lastEx = null; // If configured TCP port is busy, find first available in range. for (int port = locPort; port < locPort + locPortRange; port++) { try { GridNioServer<HadoopMessage> srvr = GridNioServer.<HadoopMessage>builder() .address(locHost) .port(port) .listener(srvLsnr) .logger(log.getLogger(GridNioServer.class)) .selectorCount(selectorsCnt) .gridName(gridName) .tcpNoDelay(tcpNoDelay) .directBuffer(directBuf) .byteOrder(ByteOrder.nativeOrder()) .socketSendBufferSize(sockSndBuf) .socketReceiveBufferSize(sockRcvBuf) .sendQueueLimit(msgQueueLimit) .directMode(false) .filters(filters()) .build(); boundTcpPort = port; // Ack Port the TCP server was bound to. if (log.isInfoEnabled()) log.info( "Successfully bound to TCP port [port=" + boundTcpPort + ", locHost=" + locHost + ']'); return srvr; } catch (IgniteCheckedException e) { lastEx = e; if (log.isDebugEnabled()) log.debug( "Failed to bind to local port (will try next port within range) [port=" + port + ", locHost=" + locHost + ']'); } } // If free port wasn't found. throw new IgniteCheckedException( "Failed to bind to any port within range [startPort=" + locPort + ", portRange=" + locPortRange + ", locHost=" + locHost + ']', lastEx); }
/** @return Outbound message queue size. */ public int getOutboundMessagesQueueSize() { return nioSrvr.outboundMessagesQueueSize(); }