@Override public synchronized void send(final HeartbeatMessage heartbeatMessage) throws IOException { final long sendStart = System.nanoTime(); final String heartbeatAddress = getHeartbeatAddress(); final HeartbeatResponseMessage responseMessage = protocolSender.heartbeat(heartbeatMessage, heartbeatAddress); final byte[] payloadBytes = heartbeatMessage.getHeartbeat().getPayload(); final HeartbeatPayload payload = HeartbeatPayload.unmarshal(payloadBytes); final List<NodeConnectionStatus> nodeStatusList = payload.getClusterStatus(); final Map<NodeIdentifier, Long> updateIdMap = nodeStatusList .stream() .collect( Collectors.toMap( status -> status.getNodeIdentifier(), status -> status.getUpdateIdentifier())); final List<NodeConnectionStatus> updatedStatuses = responseMessage.getUpdatedNodeStatuses(); if (updatedStatuses != null) { for (final NodeConnectionStatus updatedStatus : updatedStatuses) { final NodeIdentifier nodeId = updatedStatus.getNodeIdentifier(); final Long updateId = updateIdMap.get(nodeId); final boolean updated = clusterCoordinator.resetNodeStatus(updatedStatus, updateId == null ? -1L : updateId); if (updated) { logger.info( "After receiving heartbeat response, updated status of {} to {}", updatedStatus.getNodeIdentifier(), updatedStatus); } else { logger.debug( "After receiving heartbeat response, did not update status of {} to {} because the update is out-of-date", updatedStatus.getNodeIdentifier(), updatedStatus); } } } final long sendNanos = System.nanoTime() - sendStart; final long sendMillis = TimeUnit.NANOSECONDS.toMillis(sendNanos); final DateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS", Locale.US); final String flowElectionMessage = responseMessage.getFlowElectionMessage(); final String formattedElectionMessage = flowElectionMessage == null ? "" : "; " + flowElectionMessage; logger.info( "Heartbeat created at {} and sent to {} at {}; send took {} millis{}", dateFormatter.format(new Date(heartbeatMessage.getHeartbeat().getCreatedTimestamp())), heartbeatAddress, dateFormatter.format(new Date()), sendMillis, formattedElectionMessage); }
private void processHeartbeat(final NodeHeartbeat heartbeat) { final NodeIdentifier nodeId = heartbeat.getNodeIdentifier(); // Do not process heartbeat if it's blocked by firewall. if (clusterCoordinator.isBlockedByFirewall(nodeId.getSocketAddress())) { clusterCoordinator.reportEvent( nodeId, Severity.WARNING, "Firewall blocked received heartbeat. Issuing disconnection request."); // request node to disconnect clusterCoordinator.requestNodeDisconnect( nodeId, DisconnectionCode.BLOCKED_BY_FIREWALL, "Blocked by Firewall"); removeHeartbeat(nodeId); return; } final NodeConnectionStatus connectionStatus = clusterCoordinator.getConnectionStatus(nodeId); if (connectionStatus == null) { // Unknown node. Issue reconnect request clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat from unknown node. Removing heartbeat and requesting that node connect to cluster."); removeHeartbeat(nodeId); clusterCoordinator.requestNodeConnect(nodeId, null); return; } final NodeConnectionState connectionState = connectionStatus.getState(); if (heartbeat.getConnectionStatus().getState() != NodeConnectionState.CONNECTED && connectionState == NodeConnectionState.CONNECTED) { // Cluster Coordinator believes that node is connected, but node does not believe so. clusterCoordinator.reportEvent( nodeId, Severity.WARNING, "Received heartbeat from node that thinks it is not yet part of the cluster," + "though the Cluster Coordinator thought it was (node claimed state was " + heartbeat.getConnectionStatus().getState() + "). Marking as Disconnected and requesting that Node reconnect to cluster"); clusterCoordinator.requestNodeConnect(nodeId, null); return; } if (NodeConnectionState.DISCONNECTED == connectionState) { // ignore heartbeats from nodes disconnected by means other than lack of heartbeat, unless it // is // the only node. We allow it if it is the only node because if we have a one-node cluster, // then // we cannot manually reconnect it. final DisconnectionCode disconnectionCode = connectionStatus.getDisconnectCode(); // Determine whether or not the node should be allowed to be in the cluster still, depending // on its reason for disconnection. if (disconnectionCode == DisconnectionCode.LACK_OF_HEARTBEAT || disconnectionCode == DisconnectionCode.UNABLE_TO_COMMUNICATE) { clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat from node previously " + "disconnected due to " + disconnectionCode + ". Issuing reconnection request."); clusterCoordinator.requestNodeConnect(nodeId, null); } else { // disconnected nodes should not heartbeat, so we need to issue a disconnection request. logger.info( "Ignoring received heartbeat from disconnected node " + nodeId + ". Issuing disconnection request."); clusterCoordinator.requestNodeDisconnect( nodeId, DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE, DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE.toString()); removeHeartbeat(nodeId); } return; } if (NodeConnectionState.DISCONNECTING == connectionStatus.getState()) { // ignore spurious heartbeat removeHeartbeat(nodeId); return; } // first heartbeat causes status change from connecting to connected if (NodeConnectionState.CONNECTING == connectionState) { final Long connectionRequestTime = connectionStatus.getConnectionRequestTime(); if (connectionRequestTime != null && heartbeat.getTimestamp() < connectionRequestTime) { clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat but ignoring because it was reported before the node was last asked to reconnect."); removeHeartbeat(nodeId); return; } // connection complete clusterCoordinator.finishNodeConnection(nodeId); clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received first heartbeat from connecting node. Node connected."); } clusterCoordinator.updateNodeRoles(nodeId, heartbeat.getRoles()); }