/** * Starts communication. * * @throws IgniteCheckedException If failed. */ public void start() throws IgniteCheckedException { try { locHost = U.getLocalHost(); } catch (IOException e) { throw new IgniteCheckedException("Failed to initialize local address.", e); } try { shmemSrv = resetShmemServer(); } catch (IgniteCheckedException e) { U.warn(log, "Failed to start shared memory communication server.", e); } try { // This method potentially resets local port to the value // local node was bound to. nioSrvr = resetNioServer(); } catch (IgniteCheckedException e) { throw new IgniteCheckedException("Failed to initialize TCP server: " + locHost, e); } locProcDesc.address(locHost.getHostAddress()); locProcDesc.sharedMemoryPort(boundTcpShmemPort); locProcDesc.tcpPort(boundTcpPort); locIdMsg = new ProcessHandshakeMessage(locProcDesc); if (shmemSrv != null) { shmemAcceptWorker = new ShmemAcceptWorker(shmemSrv); new IgniteThread(shmemAcceptWorker).start(); } nioSrvr.start(); }
/** * Sends message to Hadoop process. * * @param desc * @param msg * @throws IgniteCheckedException */ public void sendMessage(HadoopProcessDescriptor desc, HadoopMessage msg) throws IgniteCheckedException { assert desc != null; assert msg != null; if (log.isTraceEnabled()) log.trace("Sending message to Hadoop process [desc=" + desc + ", msg=" + msg + ']'); HadoopCommunicationClient client = null; boolean closeOnRelease = true; try { client = reserveClient(desc); client.sendMessage(desc, msg); closeOnRelease = false; } finally { if (client != null) { if (closeOnRelease) { client.forceClose(); clients.remove(desc.processId(), client); } else client.release(); } } }
/** {@inheritDoc} */ @Override public void onDisconnected(GridNioSession ses, @Nullable Exception e) { if (log.isDebugEnabled()) log.debug("Closed connection for session: " + ses); if (e != null) U.error(log, "Session disconnected due to exception: " + ses, e); HadoopProcessDescriptor desc = ses.meta(PROCESS_META); if (desc != null) { HadoopCommunicationClient rmv = clients.remove(desc.processId()); if (rmv != null) rmv.forceClose(); } HadoopMessageListener lsnr0 = lsnr; if (lsnr0 != null) // Notify listener about connection close. lsnr0.onConnectionLost(desc); }
/** * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ @Nullable protected HadoopCommunicationClient createNioClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { assert desc != null; int shmemPort = desc.sharedMemoryPort(); // If remote node has shared memory server enabled and has the same set of MACs // then we are likely to run on the same host and shared memory communication could be tried. if (shmemPort != -1 && locProcDesc.parentNodeId().equals(desc.parentNodeId())) { try { return createShmemClient(desc, shmemPort); } catch (IgniteCheckedException e) { if (e.hasCause(IpcOutOfSystemResourcesException.class)) // Has cause or is itself the IpcOutOfSystemResourcesException. LT.warn(log, null, OUT_OF_RESOURCES_TCP_MSG); else if (log.isDebugEnabled()) log.debug( "Failed to establish shared memory connection with local hadoop process: " + desc); } } return createTcpClient(desc); }
/** * Returns existing or just created client to node. * * @param desc Node to which client should be open. * @return The existing or just created client. * @throws IgniteCheckedException Thrown if any exception occurs. */ private HadoopCommunicationClient reserveClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { assert desc != null; UUID procId = desc.processId(); while (true) { HadoopCommunicationClient client = clients.get(procId); if (client == null) { if (log.isDebugEnabled()) log.debug( "Did not find client for remote process [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); // Do not allow concurrent connects. Object sync = locks.lock(procId); try { client = clients.get(procId); if (client == null) { HadoopCommunicationClient old = clients.put(procId, client = createNioClient(desc)); assert old == null; } } finally { locks.unlock(procId, sync); } assert client != null; } if (client.reserve()) return client; else // Client has just been closed by idle worker. Help it and try again. clients.remove(procId, client); } }
/** * Establish TCP connection to remote hadoop process and returns client. * * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ protected HadoopCommunicationClient createTcpClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { String addr = desc.address(); int port = desc.tcpPort(); if (log.isDebugEnabled()) log.debug( "Trying to connect to remote process [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); boolean conn = false; HadoopTcpNioCommunicationClient client = null; IgniteCheckedException errs = null; int connectAttempts = 1; long connTimeout0 = connTimeout; int attempt = 1; while (!conn) { // Reconnection on handshake timeout. try { SocketChannel ch = SocketChannel.open(); ch.configureBlocking(true); ch.socket().setTcpNoDelay(tcpNoDelay); ch.socket().setKeepAlive(true); if (sockRcvBuf > 0) ch.socket().setReceiveBufferSize(sockRcvBuf); if (sockSndBuf > 0) ch.socket().setSendBufferSize(sockSndBuf); ch.socket().connect(new InetSocketAddress(addr, port), (int) connTimeout); HandshakeFinish fin = new HandshakeFinish(); GridNioSession ses = nioSrvr.createSession(ch, F.asMap(HANDSHAKE_FINISH_META, fin)).get(); client = new HadoopTcpNioCommunicationClient(ses); if (log.isDebugEnabled()) log.debug("Waiting for handshake finish for client: " + client); fin.await(connTimeout0); conn = true; } catch (HadoopHandshakeTimeoutException e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug( "Handshake timedout (will retry with increased timeout) [timeout=" + connTimeout0 + ", desc=" + desc + ", port=" + port + ", err=" + e + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug( "Handshake timed out (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process " + "(is process still running?) [desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); break; } else { attempt++; connTimeout0 *= 2; // Continue loop. } } catch (Exception e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug("Client creation failed [addr=" + addr + ", port=" + port + ", err=" + e + ']'); if (X.hasCause(e, SocketTimeoutException.class)) LT.warn( log, null, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", port=" + port + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process (is process still running?) " + "[desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) { connectAttempts++; continue; } break; } } if (client == null) { assert errs != null; if (X.hasCause(errs, ConnectException.class)) LT.warn( log, null, "Failed to connect to a remote Hadoop process (is process still running?). " + "Make sure operating system firewall is disabled on local and remote host) " + "[addrs=" + addr + ", port=" + port + ']'); throw errs; } if (log.isDebugEnabled()) log.debug("Created client: " + client); return client; }
/** * Creates new shared memory communication server. * * @return Server. * @throws IgniteCheckedException If failed. */ @Nullable private IpcSharedMemoryServerEndpoint resetShmemServer() throws IgniteCheckedException { if (boundTcpShmemPort >= 0) throw new IgniteCheckedException( "Shared memory server was already created on port " + boundTcpShmemPort); if (shmemPort == -1 || U.isWindows()) return null; IgniteCheckedException lastEx = null; // If configured TCP port is busy, find first available in range. for (int port = shmemPort; port < shmemPort + locPortRange; port++) { try { IpcSharedMemoryServerEndpoint srv = new IpcSharedMemoryServerEndpoint( log.getLogger(IpcSharedMemoryServerEndpoint.class), locProcDesc.processId(), gridName); srv.setPort(port); srv.omitOutOfResourcesWarning(true); srv.start(); boundTcpShmemPort = port; // Ack Port the TCP server was bound to. if (log.isInfoEnabled()) log.info( "Successfully bound shared memory communication to TCP port [port=" + boundTcpShmemPort + ", locHost=" + locHost + ']'); return srv; } catch (IgniteCheckedException e) { lastEx = e; if (log.isDebugEnabled()) log.debug( "Failed to bind to local port (will try next port within range) [port=" + port + ", locHost=" + locHost + ']'); } } // If free port wasn't found. throw new IgniteCheckedException( "Failed to bind shared memory communication to any port within range [startPort=" + locPort + ", portRange=" + locPortRange + ", locHost=" + locHost + ']', lastEx); }
/** {@inheritDoc} */ @Override public void onMessageReceived(GridNioSession ses, Object msg) throws IgniteCheckedException { HadoopProcessDescriptor desc = ses.meta(PROCESS_META); UUID rmtProcId = desc == null ? null : desc.processId(); if (rmtProcId == null) { if (!(msg instanceof ProcessHandshakeMessage)) { log.warning( "Invalid handshake message received, will close connection [ses=" + ses + ", msg=" + msg + ']'); ses.close(); return; } ProcessHandshakeMessage nId = (ProcessHandshakeMessage) msg; if (log.isDebugEnabled()) log.debug("Received handshake message [ses=" + ses + ", msg=" + msg + ']'); ses.addMeta(PROCESS_META, nId.processDescriptor()); if (!ses.accepted()) // Send handshake reply. ses.send(locIdMsg); else { // rmtProcId = nId.processDescriptor().processId(); if (log.isDebugEnabled()) log.debug("Finished handshake with remote client: " + ses); Object sync = locks.tryLock(rmtProcId); if (sync != null) { try { if (clients.get(rmtProcId) == null) { if (log.isDebugEnabled()) log.debug("Will reuse session for descriptor: " + rmtProcId); // Handshake finished flag is true. clients.put(rmtProcId, new HadoopTcpNioCommunicationClient(ses)); } else { if (log.isDebugEnabled()) log.debug( "Will not reuse client as another already exists [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); } } finally { locks.unlock(rmtProcId, sync); } } else { if (log.isDebugEnabled()) log.debug( "Concurrent connection is being established, will not reuse client session [" + "locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); } } if (log.isDebugEnabled()) log.debug( "Handshake is finished for session [ses=" + ses + ", locProcDesc=" + locProcDesc + ']'); HandshakeFinish to = ses.meta(HANDSHAKE_FINISH_META); if (to != null) to.finish(); // Notify session opened (both parties). proceedSessionOpened(ses); } else { if (msgQueueLimit > 0) { GridNioMessageTracker tracker = ses.meta(TRACKER_META); if (tracker == null) { GridNioMessageTracker old = ses.addMeta(TRACKER_META, tracker = new GridNioMessageTracker(ses, msgQueueLimit)); assert old == null; } tracker.onMessageReceived(); } proceedMessageReceived(ses, msg); } }