@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case configurationChanged: { ClusterMessage.ConfigurationChangeState state = message.getPayload(); if (state.isLeaving(context.getMe())) { context.timeouts.cancelTimeout("leave"); context.left(); return start; } else { state.apply(context); return leaving; } } case leaveTimedout: { context .getLogger(ClusterState.class) .warn( "Failed to leave. Cluster may consider this" + " instance still a " + "member"); context.left(); return start; } } return this; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case configurationChanged: { ClusterMessage.ConfigurationChangeState state = message.getPayload(); if (context.getMe().equals(state.getJoin())) { context.timeouts.cancelTimeout("join"); context.joined(); outgoing.offer( message.copyHeadersTo( internal(ClusterMessage.joinResponse, context.getConfiguration()))); return entered; } else { state.apply(context); return this; } } case joiningTimeout: { context .getLogger(ClusterState.class) .info("Join timeout for " + message.getHeader(Message.CONVERSATION_ID)); // Go back to requesting configurations from potential members for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer(to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "discovery", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(4))); return acquiringConfiguration; } case joinFailure: { // This causes an exception from the join() method return start; } } return this; }
@Override public boolean process(Message<? extends MessageType> message) { if (!message.isInternal() && !message.getMessageType().equals(HeartbeatMessage.i_am_alive)) { // We assume the FROM header always exists. String from = message.getHeader(Message.FROM); if (!from.equals(message.getHeader(Message.TO))) { InstanceId theId; if (message.hasHeader(Message.INSTANCE_ID)) { // INSTANCE_ID is there since after 1.9.6 theId = new InstanceId(Integer.parseInt(message.getHeader(Message.INSTANCE_ID))); } else { theId = clusterContext.getConfiguration().getIdForUri(URI.create(from)); } if (theId != null && clusterContext.getConfiguration().getMembers().containsKey(theId) && !clusterContext.isMe(theId)) { output.offer( message.copyHeadersTo( Message.internal( HeartbeatMessage.i_am_alive, new HeartbeatMessage.IAmAliveState(theId)), Message.FROM, Message.INSTANCE_ID)); } } } return true; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case addClusterListener: { context.addClusterListener(message.<ClusterListener>getPayload()); break; } case removeClusterListener: { context.removeClusterListener(message.<ClusterListener>getPayload()); break; } case create: { String name = message.getPayload(); context.getLogger(ClusterState.class).info("Creating cluster: " + name); context.created(name); return entered; } case join: { // Send configuration request to all instances Object[] args = message.<Object[]>getPayload(); String name = (String) args[0]; URI[] clusterInstanceUris = (URI[]) args[1]; context.joining(name, Iterables.<URI, URI>iterable(clusterInstanceUris)); for (URI potentialClusterInstanceUri : clusterInstanceUris) { outgoing.offer(to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "discovery", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(4))); return acquiringConfiguration; } } return this; }
@Override public void bookInstance(InstanceId instanceId, Message message) { if (message.getPayload() == null) { throw new IllegalArgumentException("null payload for booking instance: " + message); } bookedInstances.put(instanceId, message); }
@Override public String toString() { return "Deliver " + message.getMessageType().name() + " to " + server.getServer().getServerId() + " at " + messageDeliveryTime; }
@Override public AcceptorState handle( AcceptorContext context, Message<AcceptorMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case join: { return acceptor; } } return this; }
@Override public HeartbeatState handle( HeartbeatContext context, Message<HeartbeatMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case addHeartbeatListener: { context.addHeartbeatListener((HeartbeatListener) message.getPayload()); break; } case removeHeartbeatListener: { context.removeHeartbeatListener((HeartbeatListener) message.getPayload()); break; } case join: { for (InstanceId instanceId : context.getClusterContext().getOtherInstances()) { // Setup heartbeat timeouts for the other instance context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.i_am_alive + "-" + instanceId, timeout(HeartbeatMessage.timed_out, message, instanceId)); // Send first heartbeat immediately outgoing.offer(timeout(HeartbeatMessage.sendHeartbeat, message, instanceId)); } return heartbeat; } } return this; }
public int tick() { // Deliver messages whose delivery time has passed now += tickDuration; // logger.debug( "tick:"+now ); Iterator<MessageDelivery> iter = messageDeliveries.iterator(); while (iter.hasNext()) { MessageDelivery messageDelivery = iter.next(); if (messageDelivery.getMessageDeliveryTime() <= now) { long delay = strategy.messageDelay( messageDelivery.getMessage(), messageDelivery.getServer().toString()); if (delay != NetworkLatencyStrategy.LOST) { messageDelivery.getServer().process(messageDelivery.getMessage()); } iter.remove(); } } // Check and trigger timeouts for (TestProtocolServer testServer : participants.values()) { testServer.tick(now); } // Get all sent messages from all test servers List<Message> messages = new ArrayList<Message>(); for (TestProtocolServer testServer : participants.values()) { testServer.sendMessages(messages); } // Now send them and figure out latency for (Message message : messages) { String to = message.getHeader(Message.TO); if (to.equals(Message.BROADCAST)) { for (Map.Entry<String, TestProtocolServer> testServer : participants.entrySet()) { if (!testServer.getKey().equals(message.getHeader(Message.FROM))) { long delay = strategy.messageDelay(message, testServer.getKey()); if (delay == NetworkLatencyStrategy.LOST) { logger.info("Broadcasted message to " + testServer.getKey() + " was lost"); } else { logger.info("Broadcast to " + testServer.getKey() + ": " + message); messageDeliveries.add( new MessageDelivery(now + delay, message, testServer.getValue())); } } } } else { long delay = 0; if (message.getHeader(Message.TO).equals(message.getHeader(Message.FROM))) { logger.info("Sending message to itself; zero latency"); } else { delay = strategy.messageDelay(message, to); } if (delay == NetworkLatencyStrategy.LOST) { logger.info("Send message to " + to + " was lost"); } else { TestProtocolServer server = participants.get(to); logger.info("Send to " + to + ": " + message); messageDeliveries.add(new MessageDelivery(now + delay, message, server)); } } } return messageDeliveries.size(); }
@Override public HeartbeatState handle( HeartbeatContext context, Message<HeartbeatMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case i_am_alive: { HeartbeatMessage.IAmAliveState state = message.getPayload(); if (context.getClusterContext().isMe(state.getServer())) { break; } context.getClusterContext().getLogger(HeartbeatState.class).debug("Received " + state); if (state.getServer() == null) { break; } if (context.alive(state.getServer())) { // Send suspicions messages to all non-failed servers for (InstanceId aliveServer : context.getAlive()) { if (!aliveServer.equals(context.getClusterContext().getMyId())) { URI aliveServerUri = context.getClusterContext().configuration.getUriForId(aliveServer); outgoing.offer( Message.to( HeartbeatMessage.suspicions, aliveServerUri, new HeartbeatMessage.SuspicionsState( context.getSuspicionsFor(context.getClusterContext().getMyId())))); } } } context .getClusterContext() .timeouts .cancelTimeout(HeartbeatMessage.i_am_alive + "-" + state.getServer()); context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.i_am_alive + "-" + state.getServer(), timeout(HeartbeatMessage.timed_out, message, state.getServer())); // Check if this server knows something that we don't if (message.hasHeader("last-learned")) { long lastLearned = Long.parseLong(message.getHeader("last-learned")); if (lastLearned > context.getLearnerContext().getLastKnownLearnedInstanceInCluster()) { outgoing.offer(internal(LearnerMessage.catchUp, lastLearned)); } } break; } case timed_out: { InstanceId server = message.getPayload(); context .getClusterContext() .getLogger(HeartbeatState.class) .debug("Received timed out for server " + server); // Check if this node is no longer a part of the cluster if (context.getClusterContext().getConfiguration().getMembers().containsKey(server)) { context.suspect(server); context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.i_am_alive + "-" + server, timeout(HeartbeatMessage.timed_out, message, server)); // Send suspicions messages to all non-failed servers for (InstanceId aliveServer : context.getAlive()) { if (!aliveServer.equals(context.getClusterContext().getMyId())) { URI sendTo = context.getClusterContext().getConfiguration().getUriForId(aliveServer); outgoing.offer( Message.to( HeartbeatMessage.suspicions, sendTo, new HeartbeatMessage.SuspicionsState( context.getSuspicionsFor(context.getClusterContext().getMyId())))); } } } else { // If no longer part of cluster, then don't bother context.serverLeftCluster(server); } break; } case sendHeartbeat: { InstanceId to = message.getPayload(); // Check if this node is no longer a part of the cluster if (!context.getClusterContext().isMe(to) && context.getClusterContext().getConfiguration().getMembers().containsKey(to)) { URI toSendTo = context.getClusterContext().getConfiguration().getUriForId(to); // Send heartbeat message to given server outgoing.offer( to( HeartbeatMessage.i_am_alive, toSendTo, new HeartbeatMessage.IAmAliveState(context.getClusterContext().getMyId())) .setHeader( "last-learned", context.getLearnerContext().getLastLearnedInstanceId() + "")); // Set new timeout to send heartbeat to this host context .getClusterContext() .timeouts .setTimeout( HeartbeatMessage.sendHeartbeat + "-" + to, timeout(HeartbeatMessage.sendHeartbeat, message, to)); } break; } case reset_send_heartbeat: { InstanceId to = message.getPayload(); if (!context.getClusterContext().isMe(to)) { String timeoutName = HeartbeatMessage.sendHeartbeat + "-" + to; context.getClusterContext().timeouts.cancelTimeout(timeoutName); context .getClusterContext() .timeouts .setTimeout(timeoutName, timeout(HeartbeatMessage.sendHeartbeat, message, to)); } break; } case suspicions: { HeartbeatMessage.SuspicionsState suspicions = message.getPayload(); context .getClusterContext() .getLogger(HeartbeatState.class) .debug("Received suspicions as " + suspicions); URI from = new URI(message.getHeader(Message.FROM)); InstanceId fromId = context.getClusterContext().getConfiguration().getServerId(from); /* * Remove ourselves from the suspicions received - we just received a message, * it's not normal to be considered failed. Whatever it was, it was transient and now it has * passed. */ suspicions.getSuspicions().remove(context.getClusterContext().getMyId()); context.suspicions(fromId, suspicions.getSuspicions()); break; } case leave: { context.getClusterContext().getLogger(HeartbeatState.class).debug("Received leave"); return start; } case addHeartbeatListener: { context.addHeartbeatListener((HeartbeatListener) message.getPayload()); break; } case removeHeartbeatListener: { context.removeHeartbeatListener((HeartbeatListener) message.getPayload()); break; } } return this; }
@Override public AcceptorState handle( AcceptorContext context, Message<AcceptorMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case prepare: { AcceptorMessage.PrepareState prepareState = message.getPayload(); InstanceId instanceId = new InstanceId(message); AcceptorInstance instance = context.getAcceptorInstance(instanceId); if (prepareState.getBallot() >= instance.getBallot()) { context.promise(instance, prepareState.getBallot()); outgoing.offer( message.copyHeadersTo( Message.respond( ProposerMessage.promise, message, new ProposerMessage.PromiseState( prepareState.getBallot(), instance.getValue())), InstanceId.INSTANCE)); } else { // Optimization - explicit reject context .getLogger(AcceptorState.class) .debug("Reject " + instanceId + " ballot:" + instance.getBallot()); outgoing.offer( message.copyHeadersTo( Message.respond( ProposerMessage.rejectPrepare, message, new ProposerMessage.RejectPrepare(instance.getBallot())), InstanceId.INSTANCE)); } break; } case accept: { // Task 4 AcceptorMessage.AcceptState acceptState = message.getPayload(); InstanceId instanceId = new InstanceId(message); AcceptorInstance instance = context.getAcceptorInstance(instanceId); if (acceptState.getBallot() == instance.getBallot()) { context.accept(instance, acceptState.getValue()); instance.accept(acceptState.getValue()); outgoing.offer( message.copyHeadersTo( Message.respond( ProposerMessage.accepted, message, new ProposerMessage.AcceptedState()), InstanceId.INSTANCE)); } else { context .getLogger(AcceptorState.class) .debug( "Reject " + instanceId + " accept ballot:" + acceptState.getBallot() + " actual ballot:" + instance.getBallot()); outgoing.offer( message.copyHeadersTo( Message.respond( ProposerMessage.rejectAccept, message, new ProposerMessage.RejectAcceptState()), InstanceId.INSTANCE)); } break; } case leave: { context.leave(); return start; } } return this; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { List<URI> discoveredInstances = context.getDiscoveredInstances(); switch (message.getMessageType()) { case configurationResponse: { context.timeouts.cancelTimeout("discovery"); ClusterMessage.ConfigurationResponseState state = message.getPayload(); context.getLogger(ClusterState.class).info("Joining cluster " + state.getClusterName()); if (!context.getConfiguration().getName().equals(state.getClusterName())) { context .getLogger(ClusterState.class) .warn( "Joined cluster name is different than " + "the one configured. Expected " + context.getConfiguration().getName() + ", got " + state.getClusterName() + "."); } List<URI> memberList = new ArrayList<URI>(state.getMembers()); context.learnerContext.setLastDeliveredInstanceId( state.getLatestReceivedInstanceId().getId()); context.learnerContext.learnedInstanceId(state.getLatestReceivedInstanceId().getId()); context.proposerContext.nextInstanceId = state.getLatestReceivedInstanceId().getId() + 1; context.acquiredConfiguration(memberList, state.getRoles()); if (!memberList.contains(context.me)) { context .getLogger(ClusterState.class) .info( String.format( "%s joining:%s, " + "last delivered:%d", context.me.toString(), context.getConfiguration().toString(), state.getLatestReceivedInstanceId().getId())); ClusterMessage.ConfigurationChangeState newState = new ClusterMessage.ConfigurationChangeState(); newState.join(context.me); // Let the coordinator propose this if possible URI coordinator = state.getRoles().get(ClusterConfiguration.COORDINATOR); if (coordinator != null) { outgoing.offer(to(ProposerMessage.propose, coordinator, newState)); } else { outgoing.offer( to( ProposerMessage.propose, new URI(message.getHeader(Message.FROM)), newState)); } context .getLogger(ClusterState.class) .debug("Setup join timeout for " + message.getHeader(Message.CONVERSATION_ID)); context.timeouts.setTimeout( "join", timeout( ClusterMessage.joiningTimeout, message, new URI(message.getHeader(Message.FROM)))); return joining; } else { // Already in (probably due to crash of this server previously), go to entered state context.joined(); outgoing.offer(internal(ClusterMessage.joinResponse, context.getConfiguration())); return entered; } } case configurationTimeout: { ClusterMessage.ConfigurationTimeoutState state = message.getPayload(); if (state.getRemainingPings() > 0) { // Send out requests again for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer( to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "join", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(state.getRemainingPings() - 1))); } else { // No responses // Check if we picked up any other instances' requests during this phase if (!discoveredInstances.isEmpty()) { Collections.sort(discoveredInstances); /* * The assumption here is that the lowest in the list of discovered instances * will create the cluster. Keep in mind that this is run on all instances so * everyone will pick the same one. * If the one picked up is configured to not init a cluster then the timeout * set in else{} will take care of that. */ if (discoveredInstances.get(0).compareTo(context.getMe()) >= 0) { discoveredInstances.clear(); // I'm supposed to create the cluster - fail the join outgoing.offer( internal( ClusterMessage.joinFailure, new TimeoutException("Join failed, timeout waiting for configuration"))); return start; } else { discoveredInstances.clear(); // Someone else is supposed to create the cluster - restart the join discovery for (URI potentialClusterInstanceUri : context.getJoiningInstances()) { outgoing.offer( to(ClusterMessage.configurationRequest, potentialClusterInstanceUri)); } context.timeouts.setTimeout( "discovery", timeout( ClusterMessage.configurationTimeout, message, new ClusterMessage.ConfigurationTimeoutState(4))); } } else { // Join failed outgoing.offer( internal( ClusterMessage.joinFailure, new TimeoutException("Join failed, timeout waiting for configuration"))); return start; } } return this; } case configurationRequest: { // We're listening for existing clusters, but if all instances start up at the same time // and look for each other, this allows us to pick that up URI joiningInstanceUri = new URI(message.getHeader(Message.FROM)); if (!discoveredInstances.contains(joiningInstanceUri)) { discoveredInstances.add(joiningInstanceUri); } } } return this; }
@Override public State<?, ?> handle( ClusterContext context, Message<ClusterMessage> message, MessageHolder outgoing) throws Throwable { switch (message.getMessageType()) { case addClusterListener: { context.addClusterListener(message.<ClusterListener>getPayload()); break; } case removeClusterListener: { context.removeClusterListener(message.<ClusterListener>getPayload()); break; } case configurationRequest: { outgoing.offer( message.copyHeadersTo( respond( ClusterMessage.configurationResponse, message, new ClusterMessage.ConfigurationResponseState( context.getConfiguration().getRoles(), context.getConfiguration().getMembers(), new InstanceId(context.learnerContext.getLastDeliveredInstanceId()), context.getConfiguration().getName())))); break; } case configurationChanged: { ClusterMessage.ConfigurationChangeState state = message.getPayload(); state.apply(context); break; } case leave: { List<URI> nodeList = new ArrayList<URI>(context.getConfiguration().getMembers()); if (nodeList.size() == 1) { context .getLogger(ClusterState.class) .info("Shutting down cluster: " + context.getConfiguration().getName()); context.left(); return start; } else { context.getLogger(ClusterState.class).info("Leaving:" + nodeList); ClusterMessage.ConfigurationChangeState newState = new ClusterMessage.ConfigurationChangeState(); newState.leave(context.me); outgoing.offer(internal(AtomicBroadcastMessage.broadcast, newState)); context.timeouts.setTimeout("leave", timeout(ClusterMessage.leaveTimedout, message)); return leaving; } } } return this; }