/** * Fetches all of the latest heartbeats and updates the Cluster Coordinator as appropriate, based * on the heartbeats received. * * <p>Visible for testing. */ protected synchronized void monitorHeartbeats() { final Map<NodeIdentifier, NodeHeartbeat> latestHeartbeats = getLatestHeartbeats(); if (latestHeartbeats == null || latestHeartbeats.isEmpty()) { logger.debug( "Received no new heartbeats. Will not disconnect any nodes due to lack of heartbeat"); return; } final StopWatch procStopWatch = new StopWatch(true); for (final NodeHeartbeat heartbeat : latestHeartbeats.values()) { try { processHeartbeat(heartbeat); } catch (final Exception e) { clusterCoordinator.reportEvent( null, Severity.ERROR, "Received heartbeat from " + heartbeat.getNodeIdentifier() + " but failed to process heartbeat due to " + e); logger.error( "Failed to process heartbeat from {} due to {}", heartbeat.getNodeIdentifier(), e.toString()); logger.error("", e); } } procStopWatch.stop(); logger.info( "Finished processing {} heartbeats in {}", latestHeartbeats.size(), procStopWatch.getDuration()); // Disconnect any node that hasn't sent a heartbeat in a long time (8 times the heartbeat // interval) final long maxMillis = heartbeatIntervalMillis * 8; final long threshold = System.currentTimeMillis() - maxMillis; for (final NodeHeartbeat heartbeat : latestHeartbeats.values()) { if (heartbeat.getTimestamp() < threshold) { clusterCoordinator.requestNodeDisconnect( heartbeat.getNodeIdentifier(), DisconnectionCode.LACK_OF_HEARTBEAT, "Latest heartbeat from Node has expired"); try { removeHeartbeat(heartbeat.getNodeIdentifier()); } catch (final Exception e) { logger.warn( "Failed to remove heartbeat for {} due to {}", heartbeat.getNodeIdentifier(), e.toString()); logger.warn("", e); } } } }
@Override public synchronized void send(final HeartbeatMessage heartbeatMessage) throws IOException { final long sendStart = System.nanoTime(); final String heartbeatAddress = getHeartbeatAddress(); final HeartbeatResponseMessage responseMessage = protocolSender.heartbeat(heartbeatMessage, heartbeatAddress); final byte[] payloadBytes = heartbeatMessage.getHeartbeat().getPayload(); final HeartbeatPayload payload = HeartbeatPayload.unmarshal(payloadBytes); final List<NodeConnectionStatus> nodeStatusList = payload.getClusterStatus(); final Map<NodeIdentifier, Long> updateIdMap = nodeStatusList .stream() .collect( Collectors.toMap( status -> status.getNodeIdentifier(), status -> status.getUpdateIdentifier())); final List<NodeConnectionStatus> updatedStatuses = responseMessage.getUpdatedNodeStatuses(); if (updatedStatuses != null) { for (final NodeConnectionStatus updatedStatus : updatedStatuses) { final NodeIdentifier nodeId = updatedStatus.getNodeIdentifier(); final Long updateId = updateIdMap.get(nodeId); final boolean updated = clusterCoordinator.resetNodeStatus(updatedStatus, updateId == null ? -1L : updateId); if (updated) { logger.info( "After receiving heartbeat response, updated status of {} to {}", updatedStatus.getNodeIdentifier(), updatedStatus); } else { logger.debug( "After receiving heartbeat response, did not update status of {} to {} because the update is out-of-date", updatedStatus.getNodeIdentifier(), updatedStatus); } } } final long sendNanos = System.nanoTime() - sendStart; final long sendMillis = TimeUnit.NANOSECONDS.toMillis(sendNanos); final DateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS", Locale.US); final String flowElectionMessage = responseMessage.getFlowElectionMessage(); final String formattedElectionMessage = flowElectionMessage == null ? "" : "; " + flowElectionMessage; logger.info( "Heartbeat created at {} and sent to {} at {}; send took {} millis{}", dateFormatter.format(new Date(heartbeatMessage.getHeartbeat().getCreatedTimestamp())), heartbeatAddress, dateFormatter.format(new Date()), sendMillis, formattedElectionMessage); }
/** * Replicates the request to the given node * * @param method the HTTP method * @param entity the Entity to replicate * @param nodeUuid the UUID of the node to replicate the request to * @return the response from the node * @throws UnknownNodeException if the nodeUuid given does not map to any node in the cluster */ protected Response replicate( final String method, final Object entity, final String nodeUuid, final Map<String, String> headersToOverride) { // since we're cluster we must specify the cluster node identifier if (nodeUuid == null) { throw new IllegalArgumentException("The cluster node identifier must be specified."); } final NodeIdentifier nodeId = clusterCoordinator.getNodeIdentifier(nodeUuid); if (nodeId == null) { throw new UnknownNodeException( "Cannot replicate request " + method + " " + getAbsolutePath() + " to node with ID " + nodeUuid + " because the specified node does not exist."); } final Set<NodeIdentifier> targetNodes = Collections.singleton(nodeId); final URI path = getAbsolutePath(); try { final Map<String, String> headers = headersToOverride == null ? getHeaders() : getHeaders(headersToOverride); return requestReplicator .replicate(targetNodes, method, path, entity, headers) .awaitMergedResponse() .getResponse(); } catch (final InterruptedException ie) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR) .entity("Request to " + method + " " + path + " was interrupted") .type("text/plain") .build(); } }
/** * Returns the available Peers and its status of this NiFi. * * @return A peersEntity. */ @GET @Path("/peers") @Consumes(MediaType.WILDCARD) @Produces({MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML}) @ApiOperation( value = "Returns the available Peers and its status of this NiFi", response = PeersEntity.class, authorizations = {@Authorization(value = "Read - /site-to-site", type = "")}) @ApiResponses( value = { @ApiResponse( code = 400, message = "NiFi was unable to complete the request because it was invalid. The request should not be retried without modification."), @ApiResponse(code = 401, message = "Client could not be authenticated."), @ApiResponse(code = 403, message = "Client is not authorized to make this request."), @ApiResponse( code = 409, message = "The request was valid but NiFi was not in the appropriate state to process it. Retrying the same request later may be successful.") }) public Response getPeers(@Context HttpServletRequest req) { authorizeSiteToSite(); if (!properties.isSiteToSiteHttpEnabled()) { return responseCreator.httpSiteToSiteIsNotEnabledResponse(); } final Integer transportProtocolVersion; try { transportProtocolVersion = negotiateTransportProtocolVersion(req, transportProtocolVersionNegotiator); } catch (BadRequestException e) { return responseCreator.badRequestResponse(e); } final List<PeerDTO> peers = new ArrayList<>(); if (properties.isNode()) { final Set<NodeIdentifier> nodeIds = clusterCoordinator.getNodeIdentifiers(NodeConnectionState.CONNECTED); // TODO: Get total number of FlowFiles for each node for (final NodeIdentifier nodeId : nodeIds) { final PeerDTO peer = new PeerDTO(); final String siteToSiteAddress = nodeId.getSiteToSiteAddress(); peer.setHostname(siteToSiteAddress == null ? nodeId.getApiAddress() : siteToSiteAddress); peer.setPort( nodeId.getSiteToSiteHttpApiPort() == null ? nodeId.getApiPort() : nodeId.getSiteToSiteHttpApiPort()); peer.setSecure(nodeId.isSiteToSiteSecure()); peer.setFlowFileCount(0); peers.add(peer); } } else { // Standalone mode. final PeerDTO peer = new PeerDTO(); // Private IP address or hostname may not be accessible from client in some environments. // So, use the value defined in nifi.properties instead when it is defined. final String remoteInputHost = properties.getRemoteInputHost(); String localName; try { // Get local host name using InetAddress if available, same as RAW socket does. localName = InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { if (logger.isDebugEnabled()) { logger.debug("Failed to get local host name using InetAddress.", e); } localName = req.getLocalName(); } peer.setHostname(isEmpty(remoteInputHost) ? localName : remoteInputHost); peer.setPort(properties.getRemoteInputHttpPort()); peer.setSecure(properties.isSiteToSiteSecure()); peer.setFlowFileCount( 0); // doesn't matter how many FlowFiles we have, because we're the only host. peers.add(peer); } final PeersEntity entity = new PeersEntity(); entity.setPeers(peers); return clusterContext( noCache( setCommonHeaders( Response.ok(entity), transportProtocolVersion, transactionManager))) .build(); }
private void processHeartbeat(final NodeHeartbeat heartbeat) { final NodeIdentifier nodeId = heartbeat.getNodeIdentifier(); // Do not process heartbeat if it's blocked by firewall. if (clusterCoordinator.isBlockedByFirewall(nodeId.getSocketAddress())) { clusterCoordinator.reportEvent( nodeId, Severity.WARNING, "Firewall blocked received heartbeat. Issuing disconnection request."); // request node to disconnect clusterCoordinator.requestNodeDisconnect( nodeId, DisconnectionCode.BLOCKED_BY_FIREWALL, "Blocked by Firewall"); removeHeartbeat(nodeId); return; } final NodeConnectionStatus connectionStatus = clusterCoordinator.getConnectionStatus(nodeId); if (connectionStatus == null) { // Unknown node. Issue reconnect request clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat from unknown node. Removing heartbeat and requesting that node connect to cluster."); removeHeartbeat(nodeId); clusterCoordinator.requestNodeConnect(nodeId, null); return; } final NodeConnectionState connectionState = connectionStatus.getState(); if (heartbeat.getConnectionStatus().getState() != NodeConnectionState.CONNECTED && connectionState == NodeConnectionState.CONNECTED) { // Cluster Coordinator believes that node is connected, but node does not believe so. clusterCoordinator.reportEvent( nodeId, Severity.WARNING, "Received heartbeat from node that thinks it is not yet part of the cluster," + "though the Cluster Coordinator thought it was (node claimed state was " + heartbeat.getConnectionStatus().getState() + "). Marking as Disconnected and requesting that Node reconnect to cluster"); clusterCoordinator.requestNodeConnect(nodeId, null); return; } if (NodeConnectionState.DISCONNECTED == connectionState) { // ignore heartbeats from nodes disconnected by means other than lack of heartbeat, unless it // is // the only node. We allow it if it is the only node because if we have a one-node cluster, // then // we cannot manually reconnect it. final DisconnectionCode disconnectionCode = connectionStatus.getDisconnectCode(); // Determine whether or not the node should be allowed to be in the cluster still, depending // on its reason for disconnection. if (disconnectionCode == DisconnectionCode.LACK_OF_HEARTBEAT || disconnectionCode == DisconnectionCode.UNABLE_TO_COMMUNICATE) { clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat from node previously " + "disconnected due to " + disconnectionCode + ". Issuing reconnection request."); clusterCoordinator.requestNodeConnect(nodeId, null); } else { // disconnected nodes should not heartbeat, so we need to issue a disconnection request. logger.info( "Ignoring received heartbeat from disconnected node " + nodeId + ". Issuing disconnection request."); clusterCoordinator.requestNodeDisconnect( nodeId, DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE, DisconnectionCode.HEARTBEAT_RECEIVED_FROM_DISCONNECTED_NODE.toString()); removeHeartbeat(nodeId); } return; } if (NodeConnectionState.DISCONNECTING == connectionStatus.getState()) { // ignore spurious heartbeat removeHeartbeat(nodeId); return; } // first heartbeat causes status change from connecting to connected if (NodeConnectionState.CONNECTING == connectionState) { final Long connectionRequestTime = connectionStatus.getConnectionRequestTime(); if (connectionRequestTime != null && heartbeat.getTimestamp() < connectionRequestTime) { clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received heartbeat but ignoring because it was reported before the node was last asked to reconnect."); removeHeartbeat(nodeId); return; } // connection complete clusterCoordinator.finishNodeConnection(nodeId); clusterCoordinator.reportEvent( nodeId, Severity.INFO, "Received first heartbeat from connecting node. Node connected."); } clusterCoordinator.updateNodeRoles(nodeId, heartbeat.getRoles()); }
@Override public DownloadableContent getContent(final ContentRequestContext request) { // if clustered, send request to cluster manager if (properties.isClustered() && clusterCoordinator != null && clusterCoordinator.isConnected()) { // get the URI URI dataUri; try { dataUri = new URI(request.getDataUri()); } catch (final URISyntaxException use) { throw new ClusterRequestException(use); } // set the request parameters final MultivaluedMap<String, String> parameters = new MultivaluedMapImpl(); parameters.add(CLIENT_ID_PARAM, request.getClientId()); // set the headers final Map<String, String> headers = new HashMap<>(); if (StringUtils.isNotBlank(request.getProxiedEntitiesChain())) { headers.put("X-ProxiedEntitiesChain", request.getProxiedEntitiesChain()); } // add the user's authorities (if any) to the headers final Authentication authentication = SecurityContextHolder.getContext().getAuthentication(); if (authentication != null) { final Object userDetailsObj = authentication.getPrincipal(); if (userDetailsObj instanceof NiFiUserDetails) { // serialize user details object final String hexEncodedUserDetails = WebUtils.serializeObjectToHex((Serializable) userDetailsObj); // put serialized user details in header headers.put("X-ProxiedEntityUserDetails", hexEncodedUserDetails); } } // ensure we were able to detect the cluster node id if (request.getClusterNodeId() == null) { throw new IllegalArgumentException("Unable to determine the which node has the content."); } // get the target node and ensure it exists final NodeIdentifier nodeId = clusterCoordinator.getNodeIdentifier(request.getClusterNodeId()); final Set<NodeIdentifier> targetNodes = Collections.singleton(nodeId); // replicate the request to the specific node NodeResponse nodeResponse; try { nodeResponse = requestReplicator .replicate(targetNodes, HttpMethod.GET, dataUri, parameters, headers) .awaitMergedResponse(); } catch (InterruptedException e) { throw new IllegalClusterStateException( "Interrupted while waiting for a response from node"); } final ClientResponse clientResponse = nodeResponse.getClientResponse(); final MultivaluedMap<String, String> responseHeaders = clientResponse.getHeaders(); // ensure an appropriate response if (Status.NOT_FOUND.getStatusCode() == clientResponse.getStatusInfo().getStatusCode()) { throw new ResourceNotFoundException(clientResponse.getEntity(String.class)); } else if (Status.FORBIDDEN.getStatusCode() == clientResponse.getStatusInfo().getStatusCode() || Status.UNAUTHORIZED.getStatusCode() == clientResponse.getStatusInfo().getStatusCode()) { throw new AccessDeniedException(clientResponse.getEntity(String.class)); } else if (Status.OK.getStatusCode() != clientResponse.getStatusInfo().getStatusCode()) { throw new IllegalStateException(clientResponse.getEntity(String.class)); } // get the file name final String contentDisposition = responseHeaders.getFirst("Content-Disposition"); final String filename = StringUtils.substringBetween(contentDisposition, "filename=\"", "\""); // get the content type final String contentType = responseHeaders.getFirst("Content-Type"); // create the downloadable content return new DownloadableContent(filename, contentType, clientResponse.getEntityInputStream()); } else { // example URIs: // http://localhost:8080/nifi-api/provenance/events/{id}/content/{input|output} // http://localhost:8080/nifi-api/flowfile-queues/{uuid}/flowfiles/{uuid}/content // get just the context path for comparison final String dataUri = StringUtils.substringAfter(request.getDataUri(), "/nifi-api"); if (StringUtils.isBlank(dataUri)) { throw new IllegalArgumentException("The specified data reference URI is not valid."); } // flowfile listing content final Matcher flowFileMatcher = FLOWFILE_CONTENT_URI_PATTERN.matcher(dataUri); if (flowFileMatcher.matches()) { final String connectionId = flowFileMatcher.group(1); final String flowfileId = flowFileMatcher.group(2); return getFlowFileContent(connectionId, flowfileId, dataUri); } // provenance event content final Matcher provenanceMatcher = PROVENANCE_CONTENT_URI_PATTERN.matcher(dataUri); if (provenanceMatcher.matches()) { try { final Long eventId = Long.parseLong(provenanceMatcher.group(1)); final ContentDirection direction = ContentDirection.valueOf(provenanceMatcher.group(2).toUpperCase()); return getProvenanceEventContent(eventId, dataUri, direction); } catch (final IllegalArgumentException iae) { throw new IllegalArgumentException("The specified data reference URI is not valid."); } } // invalid uri throw new IllegalArgumentException("The specified data reference URI is not valid."); } }
/** * @return <code>true</code> if connected to a cluster, <code>false</code> if running in * standalone mode or disconnected from cluster */ boolean isConnectedToCluster() { return clusterCoordinator != null && clusterCoordinator.isConnected(); }