/** * Starts communication. * * @throws IgniteCheckedException If failed. */ public void start() throws IgniteCheckedException { try { locHost = U.getLocalHost(); } catch (IOException e) { throw new IgniteCheckedException("Failed to initialize local address.", e); } try { shmemSrv = resetShmemServer(); } catch (IgniteCheckedException e) { U.warn(log, "Failed to start shared memory communication server.", e); } try { // This method potentially resets local port to the value // local node was bound to. nioSrvr = resetNioServer(); } catch (IgniteCheckedException e) { throw new IgniteCheckedException("Failed to initialize TCP server: " + locHost, e); } locProcDesc.address(locHost.getHostAddress()); locProcDesc.sharedMemoryPort(boundTcpShmemPort); locProcDesc.tcpPort(boundTcpPort); locIdMsg = new ProcessHandshakeMessage(locProcDesc); if (shmemSrv != null) { shmemAcceptWorker = new ShmemAcceptWorker(shmemSrv); new IgniteThread(shmemAcceptWorker).start(); } nioSrvr.start(); }
/** * Establish TCP connection to remote hadoop process and returns client. * * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ protected HadoopCommunicationClient createTcpClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { String addr = desc.address(); int port = desc.tcpPort(); if (log.isDebugEnabled()) log.debug( "Trying to connect to remote process [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); boolean conn = false; HadoopTcpNioCommunicationClient client = null; IgniteCheckedException errs = null; int connectAttempts = 1; long connTimeout0 = connTimeout; int attempt = 1; while (!conn) { // Reconnection on handshake timeout. try { SocketChannel ch = SocketChannel.open(); ch.configureBlocking(true); ch.socket().setTcpNoDelay(tcpNoDelay); ch.socket().setKeepAlive(true); if (sockRcvBuf > 0) ch.socket().setReceiveBufferSize(sockRcvBuf); if (sockSndBuf > 0) ch.socket().setSendBufferSize(sockSndBuf); ch.socket().connect(new InetSocketAddress(addr, port), (int) connTimeout); HandshakeFinish fin = new HandshakeFinish(); GridNioSession ses = nioSrvr.createSession(ch, F.asMap(HANDSHAKE_FINISH_META, fin)).get(); client = new HadoopTcpNioCommunicationClient(ses); if (log.isDebugEnabled()) log.debug("Waiting for handshake finish for client: " + client); fin.await(connTimeout0); conn = true; } catch (HadoopHandshakeTimeoutException e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug( "Handshake timedout (will retry with increased timeout) [timeout=" + connTimeout0 + ", desc=" + desc + ", port=" + port + ", err=" + e + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug( "Handshake timed out (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process " + "(is process still running?) [desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); break; } else { attempt++; connTimeout0 *= 2; // Continue loop. } } catch (Exception e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug("Client creation failed [addr=" + addr + ", port=" + port + ", err=" + e + ']'); if (X.hasCause(e, SocketTimeoutException.class)) LT.warn( log, null, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", port=" + port + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process (is process still running?) " + "[desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) { connectAttempts++; continue; } break; } } if (client == null) { assert errs != null; if (X.hasCause(errs, ConnectException.class)) LT.warn( log, null, "Failed to connect to a remote Hadoop process (is process still running?). " + "Make sure operating system firewall is disabled on local and remote host) " + "[addrs=" + addr + ", port=" + port + ']'); throw errs; } if (log.isDebugEnabled()) log.debug("Created client: " + client); return client; }