Ejemplo n.º 1
0
  @Override
  public synchronized void send(final HeartbeatMessage heartbeatMessage) throws IOException {
    final long sendStart = System.nanoTime();

    final String heartbeatAddress = getHeartbeatAddress();
    final HeartbeatResponseMessage responseMessage =
        protocolSender.heartbeat(heartbeatMessage, heartbeatAddress);

    final byte[] payloadBytes = heartbeatMessage.getHeartbeat().getPayload();
    final HeartbeatPayload payload = HeartbeatPayload.unmarshal(payloadBytes);
    final List<NodeConnectionStatus> nodeStatusList = payload.getClusterStatus();
    final Map<NodeIdentifier, Long> updateIdMap =
        nodeStatusList
            .stream()
            .collect(
                Collectors.toMap(
                    status -> status.getNodeIdentifier(), status -> status.getUpdateIdentifier()));

    final List<NodeConnectionStatus> updatedStatuses = responseMessage.getUpdatedNodeStatuses();
    if (updatedStatuses != null) {
      for (final NodeConnectionStatus updatedStatus : updatedStatuses) {
        final NodeIdentifier nodeId = updatedStatus.getNodeIdentifier();
        final Long updateId = updateIdMap.get(nodeId);

        final boolean updated =
            clusterCoordinator.resetNodeStatus(updatedStatus, updateId == null ? -1L : updateId);
        if (updated) {
          logger.info(
              "After receiving heartbeat response, updated status of {} to {}",
              updatedStatus.getNodeIdentifier(),
              updatedStatus);
        } else {
          logger.debug(
              "After receiving heartbeat response, did not update status of {} to {} because the update is out-of-date",
              updatedStatus.getNodeIdentifier(),
              updatedStatus);
        }
      }
    }

    final long sendNanos = System.nanoTime() - sendStart;
    final long sendMillis = TimeUnit.NANOSECONDS.toMillis(sendNanos);

    final DateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS", Locale.US);
    final String flowElectionMessage = responseMessage.getFlowElectionMessage();
    final String formattedElectionMessage =
        flowElectionMessage == null ? "" : "; " + flowElectionMessage;

    logger.info(
        "Heartbeat created at {} and sent to {} at {}; send took {} millis{}",
        dateFormatter.format(new Date(heartbeatMessage.getHeartbeat().getCreatedTimestamp())),
        heartbeatAddress,
        dateFormatter.format(new Date()),
        sendMillis,
        formattedElectionMessage);
  }
Ejemplo n.º 2
0
  private void processHeartbeat(final NodeHeartbeat heartbeat) {
    final NodeIdentifier nodeId = heartbeat.getNodeIdentifier();

    // Do not process heartbeat if it's blocked by firewall.
    if (clusterCoordinator.isBlockedByFirewall(nodeId.getSocketAddress())) {
      clusterCoordinator.reportEvent(
          nodeId,
          Severity.WARNING,
          "Firewall blocked received heartbeat. Issuing disconnection request.");

      // request node to disconnect
      clusterCoordinator.requestNodeDisconnect(
          nodeId, DisconnectionCode.BLOCKED_BY_FIREWALL, "Blocked by Firewall");
      removeHeartbeat(nodeId);
      return;
    }

    final NodeConnectionStatus connectionStatus = clusterCoordinator.getConnectionStatus(nodeId);
    if (connectionStatus == null) {
      // Unknown node. Issue reconnect request
      clusterCoordinator.reportEvent(
          nodeId,
          Severity.INFO,
          "Received heartbeat from unknown node. Removing heartbeat and requesting that node connect to cluster.");
      removeHeartbeat(nodeId);

      clusterCoordinator.requestNodeConnect(nodeId, null);
      return;
    }

    final NodeConnectionState connectionState = connectionStatus.getState();
    if (heartbeat.getConnectionStatus().getState() != NodeConnectionState.CONNECTED
        && connectionState == NodeConnectionState.CONNECTED) {
      // Cluster Coordinator believes that node is connected, but node does not believe so.
      clusterCoordinator.reportEvent(
          nodeId,
          Severity.WARNING,
          "Received heartbeat from node that thinks it is not yet part of the cluster,"
              + "though the Cluster Coordinator thought it was (node claimed state was "
              + heartbeat.getConnectionStatus().getState()
              + "). Marking as Disconnected and requesting that Node reconnect to cluster");
      clusterCoordinator.requestNodeConnect(nodeId, null);
      return;
    }

    if (NodeConnectionState.DISCONNECTED == connectionState) {
      // ignore heartbeats from nodes disconnected by means other than lack of heartbeat, unless it
      // is
      // the only node. We allow it if it is the only node because if we have a one-node cluster,
      // then
      // we cannot manually reconnect it.
      final DisconnectionCode disconnectionCode = connectionStatus.getDisconnectCode();

      // Determine whether or not the node should be allowed to be in the cluster still, depending
      // on its reason for disconnection.
      if (disconnectionCode == DisconnectionCode.LACK_OF_HEARTBEAT
          || disconnectionCode == DisconnectionCode.UNABLE_TO_COMMUNICATE) {
        clusterCoordinator.reportEvent(
            nodeId,
            Severity.INFO,
            "Received heartbeat from node previously "
                + "disconnected due to "
                + disconnectionCode
                + ". Issuing reconnection request.");

        clusterCoordinator.requestNodeConnect(nodeId, null);
      } else {
        // disconnected nodes should not heartbeat, so we need to issue a disconnection request.
        logger.info(
            "Ignoring received heartbeat from disconnected node "
                + nodeId
                + ".  Issuing disconnection request.");
        clusterCoordinator.requestNodeDisconnect(
            nodeId,
            DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE,
            DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE.toString());
        removeHeartbeat(nodeId);
      }

      return;
    }

    if (NodeConnectionState.DISCONNECTING == connectionStatus.getState()) {
      // ignore spurious heartbeat
      removeHeartbeat(nodeId);
      return;
    }

    // first heartbeat causes status change from connecting to connected
    if (NodeConnectionState.CONNECTING == connectionState) {
      final Long connectionRequestTime = connectionStatus.getConnectionRequestTime();
      if (connectionRequestTime != null && heartbeat.getTimestamp() < connectionRequestTime) {
        clusterCoordinator.reportEvent(
            nodeId,
            Severity.INFO,
            "Received heartbeat but ignoring because it was reported before the node was last asked to reconnect.");
        removeHeartbeat(nodeId);
        return;
      }

      // connection complete
      clusterCoordinator.finishNodeConnection(nodeId);
      clusterCoordinator.reportEvent(
          nodeId, Severity.INFO, "Received first heartbeat from connecting node. Node connected.");
    }

    clusterCoordinator.updateNodeRoles(nodeId, heartbeat.getRoles());
  }