/** * @param desc Process descriptor. * @param port Port. * @return Client. * @throws IgniteCheckedException If failed. */ @Nullable protected HadoopCommunicationClient createShmemClient(HadoopProcessDescriptor desc, int port) throws IgniteCheckedException { int attempt = 1; int connectAttempts = 1; long connTimeout0 = connTimeout; while (true) { IpcEndpoint clientEndpoint; try { clientEndpoint = new IpcSharedMemoryClientEndpoint(port, (int) connTimeout, log); } catch (IgniteCheckedException e) { // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && X.hasCause(e, ConnectException.class)) { connectAttempts++; continue; } throw e; } HadoopCommunicationClient client = null; try { ShmemWorker worker = new ShmemWorker(clientEndpoint, false); shmemWorkers.add(worker); GridNioSession ses = worker.session(); HandshakeFinish fin = new HandshakeFinish(); // We are in lock, it is safe to get session and attach ses.addMeta(HANDSHAKE_FINISH_META, fin); client = new HadoopTcpNioCommunicationClient(ses); new IgniteThread(worker).start(); fin.await(connTimeout0); } catch (HadoopHandshakeTimeoutException e) { if (log.isDebugEnabled()) log.debug( "Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", err=" + e.getMessage() + ", client=" + client + ']'); if (client != null) client.forceClose(); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug( "Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", client=" + client + ']'); throw e; } else { attempt++; connTimeout0 *= 2; continue; } } catch (RuntimeException | Error e) { if (log.isDebugEnabled()) log.debug( "Caught exception (will close client) [err=" + e.getMessage() + ", client=" + client + ']'); if (client != null) client.forceClose(); throw e; } return client; } }
/** * Establish TCP connection to remote hadoop process and returns client. * * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ protected HadoopCommunicationClient createTcpClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { String addr = desc.address(); int port = desc.tcpPort(); if (log.isDebugEnabled()) log.debug( "Trying to connect to remote process [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); boolean conn = false; HadoopTcpNioCommunicationClient client = null; IgniteCheckedException errs = null; int connectAttempts = 1; long connTimeout0 = connTimeout; int attempt = 1; while (!conn) { // Reconnection on handshake timeout. try { SocketChannel ch = SocketChannel.open(); ch.configureBlocking(true); ch.socket().setTcpNoDelay(tcpNoDelay); ch.socket().setKeepAlive(true); if (sockRcvBuf > 0) ch.socket().setReceiveBufferSize(sockRcvBuf); if (sockSndBuf > 0) ch.socket().setSendBufferSize(sockSndBuf); ch.socket().connect(new InetSocketAddress(addr, port), (int) connTimeout); HandshakeFinish fin = new HandshakeFinish(); GridNioSession ses = nioSrvr.createSession(ch, F.asMap(HANDSHAKE_FINISH_META, fin)).get(); client = new HadoopTcpNioCommunicationClient(ses); if (log.isDebugEnabled()) log.debug("Waiting for handshake finish for client: " + client); fin.await(connTimeout0); conn = true; } catch (HadoopHandshakeTimeoutException e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug( "Handshake timedout (will retry with increased timeout) [timeout=" + connTimeout0 + ", desc=" + desc + ", port=" + port + ", err=" + e + ']'); if (attempt == reconCnt || connTimeout0 > maxConnTimeout) { if (log.isDebugEnabled()) log.debug( "Handshake timed out (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process " + "(is process still running?) [desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); break; } else { attempt++; connTimeout0 *= 2; // Continue loop. } } catch (Exception e) { if (client != null) { client.forceClose(); client = null; } if (log.isDebugEnabled()) log.debug("Client creation failed [addr=" + addr + ", port=" + port + ", err=" + e + ']'); if (X.hasCause(e, SocketTimeoutException.class)) LT.warn( log, null, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", port=" + port + ']'); if (errs == null) errs = new IgniteCheckedException( "Failed to connect to remote Hadoop process (is process still running?) " + "[desc=" + desc + ", addrs=" + addr + ']'); errs.addSuppressed(e); // Reconnect for the second time, if connection is not established. if (connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) { connectAttempts++; continue; } break; } } if (client == null) { assert errs != null; if (X.hasCause(errs, ConnectException.class)) LT.warn( log, null, "Failed to connect to a remote Hadoop process (is process still running?). " + "Make sure operating system firewall is disabled on local and remote host) " + "[addrs=" + addr + ", port=" + port + ']'); throw errs; } if (log.isDebugEnabled()) log.debug("Created client: " + client); return client; }
/** {@inheritDoc} */ @Override public void onMessageReceived(GridNioSession ses, Object msg) throws IgniteCheckedException { HadoopProcessDescriptor desc = ses.meta(PROCESS_META); UUID rmtProcId = desc == null ? null : desc.processId(); if (rmtProcId == null) { if (!(msg instanceof ProcessHandshakeMessage)) { log.warning( "Invalid handshake message received, will close connection [ses=" + ses + ", msg=" + msg + ']'); ses.close(); return; } ProcessHandshakeMessage nId = (ProcessHandshakeMessage) msg; if (log.isDebugEnabled()) log.debug("Received handshake message [ses=" + ses + ", msg=" + msg + ']'); ses.addMeta(PROCESS_META, nId.processDescriptor()); if (!ses.accepted()) // Send handshake reply. ses.send(locIdMsg); else { // rmtProcId = nId.processDescriptor().processId(); if (log.isDebugEnabled()) log.debug("Finished handshake with remote client: " + ses); Object sync = locks.tryLock(rmtProcId); if (sync != null) { try { if (clients.get(rmtProcId) == null) { if (log.isDebugEnabled()) log.debug("Will reuse session for descriptor: " + rmtProcId); // Handshake finished flag is true. clients.put(rmtProcId, new HadoopTcpNioCommunicationClient(ses)); } else { if (log.isDebugEnabled()) log.debug( "Will not reuse client as another already exists [locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); } } finally { locks.unlock(rmtProcId, sync); } } else { if (log.isDebugEnabled()) log.debug( "Concurrent connection is being established, will not reuse client session [" + "locProcDesc=" + locProcDesc + ", desc=" + desc + ']'); } } if (log.isDebugEnabled()) log.debug( "Handshake is finished for session [ses=" + ses + ", locProcDesc=" + locProcDesc + ']'); HandshakeFinish to = ses.meta(HANDSHAKE_FINISH_META); if (to != null) to.finish(); // Notify session opened (both parties). proceedSessionOpened(ses); } else { if (msgQueueLimit > 0) { GridNioMessageTracker tracker = ses.meta(TRACKER_META); if (tracker == null) { GridNioMessageTracker old = ses.addMeta(TRACKER_META, tracker = new GridNioMessageTracker(ses, msgQueueLimit)); assert old == null; } tracker.onMessageReceived(); } proceedMessageReceived(ses, msg); } }