@Override public boolean process(Message<? extends MessageType> message) { if (!message.isInternal() && !message.getMessageType().equals(HeartbeatMessage.i_am_alive)) { // We assume the FROM header always exists. String from = message.getHeader(Message.FROM); if (!from.equals(message.getHeader(Message.TO))) { InstanceId theId; if (message.hasHeader(Message.INSTANCE_ID)) { // INSTANCE_ID is there since after 1.9.6 theId = new InstanceId(Integer.parseInt(message.getHeader(Message.INSTANCE_ID))); } else { theId = clusterContext.getConfiguration().getIdForUri(URI.create(from)); } if (theId != null && clusterContext.getConfiguration().getMembers().containsKey(theId) && !clusterContext.isMe(theId)) { output.offer( message.copyHeadersTo( Message.internal( HeartbeatMessage.i_am_alive, new HeartbeatMessage.IAmAliveState(theId)), Message.FROM, Message.INSTANCE_ID)); } } } return true; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case configurationChanged: { ClusterMessage.ConfigurationChangeState state = message.getPayload(); if (context.getMe().equals(state.getJoin())) { context.timeouts.cancelTimeout("join"); context.joined(); outgoing.offer( message.copyHeadersTo( internal(ClusterMessage.joinResponse, context.getConfiguration()))); return entered; } else { state.apply(context); return this; } } case joiningTimeout: { context .getLogger(ClusterState.class) .info("Join timeout for " + message.getHeader(Message.CONVERSATION_ID)); // Go back to requesting configurations from potential members for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer(to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "discovery", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(4))); return acquiringConfiguration; } case joinFailure: { // This causes an exception from the join() method return start; } } return this; }
public int tick() { // Deliver messages whose delivery time has passed now += tickDuration; // logger.debug( "tick:"+now ); Iterator<MessageDelivery> iter = messageDeliveries.iterator(); while (iter.hasNext()) { MessageDelivery messageDelivery = iter.next(); if (messageDelivery.getMessageDeliveryTime() <= now) { long delay = strategy.messageDelay( messageDelivery.getMessage(), messageDelivery.getServer().toString()); if (delay != NetworkLatencyStrategy.LOST) { messageDelivery.getServer().process(messageDelivery.getMessage()); } iter.remove(); } } // Check and trigger timeouts for (TestProtocolServer testServer : participants.values()) { testServer.tick(now); } // Get all sent messages from all test servers List<Message> messages = new ArrayList<Message>(); for (TestProtocolServer testServer : participants.values()) { testServer.sendMessages(messages); } // Now send them and figure out latency for (Message message : messages) { String to = message.getHeader(Message.TO); if (to.equals(Message.BROADCAST)) { for (Map.Entry<String, TestProtocolServer> testServer : participants.entrySet()) { if (!testServer.getKey().equals(message.getHeader(Message.FROM))) { long delay = strategy.messageDelay(message, testServer.getKey()); if (delay == NetworkLatencyStrategy.LOST) { logger.info("Broadcasted message to " + testServer.getKey() + " was lost"); } else { logger.info("Broadcast to " + testServer.getKey() + ": " + message); messageDeliveries.add( new MessageDelivery(now + delay, message, testServer.getValue())); } } } } else { long delay = 0; if (message.getHeader(Message.TO).equals(message.getHeader(Message.FROM))) { logger.info("Sending message to itself; zero latency"); } else { delay = strategy.messageDelay(message, to); } if (delay == NetworkLatencyStrategy.LOST) { logger.info("Send message to " + to + " was lost"); } else { TestProtocolServer server = participants.get(to); logger.info("Send to " + to + ": " + message); messageDeliveries.add(new MessageDelivery(now + delay, message, server)); } } } return messageDeliveries.size(); }
@Override public HeartbeatState handle( HeartbeatContext context, Message<HeartbeatMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case i_am_alive: { HeartbeatMessage.IAmAliveState state = message.getPayload(); if (context.getClusterContext().isMe(state.getServer())) { break; } context.getClusterContext().getLogger(HeartbeatState.class).debug("Received " + state); if (state.getServer() == null) { break; } if (context.alive(state.getServer())) { // Send suspicions messages to all non-failed servers for (InstanceId aliveServer : context.getAlive()) { if (!aliveServer.equals(context.getClusterContext().getMyId())) { URI aliveServerUri = context.getClusterContext().configuration.getUriForId(aliveServer); outgoing.offer( Message.to( HeartbeatMessage.suspicions, aliveServerUri, new HeartbeatMessage.SuspicionsState( context.getSuspicionsFor(context.getClusterContext().getMyId())))); } } } context .getClusterContext() .timeouts .cancelTimeout(HeartbeatMessage.i_am_alive + "-" + state.getServer()); context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.i_am_alive + "-" + state.getServer(), timeout(HeartbeatMessage.timed_out, message, state.getServer())); // Check if this server knows something that we don't if (message.hasHeader("last-learned")) { long lastLearned = Long.parseLong(message.getHeader("last-learned")); if (lastLearned > context.getLearnerContext().getLastKnownLearnedInstanceInCluster()) { outgoing.offer(internal(LearnerMessage.catchUp, lastLearned)); } } break; } case timed_out: { InstanceId server = message.getPayload(); context .getClusterContext() .getLogger(HeartbeatState.class) .debug("Received timed out for server " + server); // Check if this node is no longer a part of the cluster if (context.getClusterContext().getConfiguration().getMembers().containsKey(server)) { context.suspect(server); context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.i_am_alive + "-" + server, timeout(HeartbeatMessage.timed_out, message, server)); // Send suspicions messages to all non-failed servers for (InstanceId aliveServer : context.getAlive()) { if (!aliveServer.equals(context.getClusterContext().getMyId())) { URI sendTo = context.getClusterContext().getConfiguration().getUriForId(aliveServer); outgoing.offer( Message.to( HeartbeatMessage.suspicions, sendTo, new HeartbeatMessage.SuspicionsState( context.getSuspicionsFor(context.getClusterContext().getMyId())))); } } } else { // If no longer part of cluster, then don't bother context.serverLeftCluster(server); } break; } case sendHeartbeat: { InstanceId to = message.getPayload(); // Check if this node is no longer a part of the cluster if (!context.getClusterContext().isMe(to) && context.getClusterContext().getConfiguration().getMembers().containsKey(to)) { URI toSendTo = context.getClusterContext().getConfiguration().getUriForId(to); // Send heartbeat message to given server outgoing.offer( to( HeartbeatMessage.i_am_alive, toSendTo, new HeartbeatMessage.IAmAliveState(context.getClusterContext().getMyId())) .setHeader( "last-learned", context.getLearnerContext().getLastLearnedInstanceId() + "")); // Set new timeout to send heartbeat to this host context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.sendHeartbeat + "-" + to, timeout(HeartbeatMessage.sendHeartbeat, message, to)); } break; } case reset_send_heartbeat: { InstanceId to = message.getPayload(); if (!context.getClusterContext().isMe(to)) { String timeoutName = HeartbeatMessage.sendHeartbeat + "-" + to; context.getClusterContext().timeouts.cancelTimeout(timeoutName); context .getClusterContext() .timeouts .setTimeout(timeoutName, timeout(HeartbeatMessage.sendHeartbeat, message, to)); } break; } case suspicions: { HeartbeatMessage.SuspicionsState suspicions = message.getPayload(); context .getClusterContext() .getLogger(HeartbeatState.class) .debug("Received suspicions as " + suspicions); URI from = new URI(message.getHeader(Message.FROM)); InstanceId fromId = context.getClusterContext().getConfiguration().getServerId(from); /* * Remove ourselves from the suspicions received - we just received a message, * it's not normal to be considered failed. Whatever it was, it was transient and now it has * passed. */ suspicions.getSuspicions().remove(context.getClusterContext().getMyId()); context.suspicions(fromId, suspicions.getSuspicions()); break; } case leave: { context.getClusterContext().getLogger(HeartbeatState.class).debug("Received leave"); return start; } case addHeartbeatListener: { context.addHeartbeatListener((HeartbeatListener) message.getPayload()); break; } case removeHeartbeatListener: { context.removeHeartbeatListener((HeartbeatListener) message.getPayload()); break; } } return this; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { List<URI> discoveredInstances = context.getDiscoveredInstances(); switch (message.getMessageType()) { case configurationResponse: { context.timeouts.cancelTimeout("discovery"); ClusterMessage.ConfigurationResponseState state = message.getPayload(); context.getLogger(ClusterState.class).info("Joining cluster " + state.getClusterName()); if (!context.getConfiguration().getName().equals(state.getClusterName())) { context .getLogger(ClusterState.class) .warn( "Joined cluster name is different than " + "the one configured. Expected " + context.getConfiguration().getName() + ", got " + state.getClusterName() + "."); } List<URI> memberList = new ArrayList<URI>(state.getMembers()); context.learnerContext.setLastDeliveredInstanceId( state.getLatestReceivedInstanceId().getId()); context.learnerContext.learnedInstanceId(state.getLatestReceivedInstanceId().getId()); context.proposerContext.nextInstanceId = state.getLatestReceivedInstanceId().getId() + 1; context.acquiredConfiguration(memberList, state.getRoles()); if (!memberList.contains(context.me)) { context .getLogger(ClusterState.class) .info( String.format( "%s joining:%s, " + "last delivered:%d", context.me.toString(), context.getConfiguration().toString(), state.getLatestReceivedInstanceId().getId())); ClusterMessage.ConfigurationChangeState newState = new ClusterMessage.ConfigurationChangeState(); newState.join(context.me); // Let the coordinator propose this if possible URI coordinator = state.getRoles().get(ClusterConfiguration.COORDINATOR); if (coordinator != null) { outgoing.offer(to(ProposerMessage.propose, coordinator, newState)); } else { outgoing.offer( to( ProposerMessage.propose, new URI(message.getHeader(Message.FROM)), newState)); } context .getLogger(ClusterState.class) .debug("Setup join timeout for " + message.getHeader(Message.CONVERSATION_ID)); context.timeouts.setTimeout( "join", timeout( ClusterMessage.joiningTimeout, message, new URI(message.getHeader(Message.FROM)))); return joining; } else { // Already in (probably due to crash of this server previously), go to entered state context.joined(); outgoing.offer(internal(ClusterMessage.joinResponse, context.getConfiguration())); return entered; } } case configurationTimeout: { ClusterMessage.ConfigurationTimeoutState state = message.getPayload(); if (state.getRemainingPings() > 0) { // Send out requests again for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer( to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "join", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(state.getRemainingPings() - 1))); } else { // No responses // Check if we picked up any other instances' requests during this phase if (!discoveredInstances.isEmpty()) { Collections.sort(discoveredInstances); /* * The assumption here is that the lowest in the list of discovered instances * will create the cluster. Keep in mind that this is run on all instances so * everyone will pick the same one. * If the one picked up is configured to not init a cluster then the timeout * set in else{} will take care of that. */ if (discoveredInstances.get(0).compareTo(context.getMe()) >= 0) { discoveredInstances.clear(); // I'm supposed to create the cluster - fail the join outgoing.offer( internal( ClusterMessage.joinFailure, new TimeoutException("Join failed, timeout waiting for configuration"))); return start; } else { discoveredInstances.clear(); // Someone else is supposed to create the cluster - restart the join discovery for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer( to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "discovery", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(4))); } } else { // Join failed outgoing.offer( internal( ClusterMessage.joinFailure, new TimeoutException("Join failed, timeout waiting for configuration"))); return start; } } return this; } case configurationRequest: { // We're listening for existing clusters, but if all instances start up at the same time // and look for each other, this allows us to pick that up URI joiningInstanceUri = new URI(message.getHeader(Message.FROM)); if (!discoveredInstances.contains(joiningInstanceUri)) { discoveredInstances.add(joiningInstanceUri); } } } return this; }