private void runElection() { NodeID myNodeID = getLocalNodeID(); NodeID winner = ServerID.NULL_ID; int count = 0; // Only new L2 if the DB was empty (no previous state) and the current state is START (as in // before any elections // concluded) boolean isNew = state == START_STATE && clusterStatePersistor.getInitialState() == null; while (getActiveNodeID().isNull()) { if (++count > 1) { logger.info( "Rerunning election since node " + winner + " never declared itself as ACTIVE !"); } debugInfo("Running election - isNew: " + isNew); winner = electionMgr.runElection(myNodeID, isNew, weightsFactory); if (winner == myNodeID) { debugInfo("Won Election, moving to active state. myNodeID/winner=" + myNodeID); moveToActiveState(); } else { electionMgr.reset(null); // Election is lost, but we wait for the active node to declare itself as winner. If this // doesn't happen in a // finite time we restart the election. This is to prevent some weird cases where two nodes // might end up // thinking the other one is the winner. // @see MNK-518 debugInfo("Lost election, waiting for winner to declare as active, winner=" + winner); waitUntilActiveNodeIDNotNull(electionMgr.getElectionTime()); } } }
private void handleElectionAbort(L2StateMessage clusterMsg) { if (state == ACTIVE_COORDINATOR) { // Cant get Abort back to ACTIVE, if so then there is a split brain String error = state + " Received Abort Election Msg : Possible split brain detected "; logger.error(error); groupManager.zapNode( clusterMsg.messageFrom(), L2HAZapNodeRequestProcessor.SPLIT_BRAIN, error); } else { debugInfo("ElectionMgr handling election abort"); electionMgr.handleElectionAbort(clusterMsg); } }
private void handleStartElectionRequest(L2StateMessage msg) throws GroupException { if (state == ACTIVE_COORDINATOR) { // This is either a new L2 joining a cluster or a renegade L2. Force it to abort GroupMessage abortMsg = L2StateMessage.createAbortElectionMessage( msg, EnrollmentFactory.createTrumpEnrollment(getLocalNodeID(), weightsFactory)); info("Forcing Abort Election for " + msg + " with " + abortMsg); groupManager.sendTo(msg.messageFrom(), abortMsg); } else if (!electionMgr.handleStartElectionRequest(msg)) { // TODO::FIXME:: Commenting so that stage thread is not held up doing election. // startElectionIfNecessary(NodeID.NULL_ID); logger.warn("Not starting election as it was commented out"); } }
private synchronized void moveToActiveState() { if (state == START_STATE || state == PASSIVE_STANDBY) { // TODO :: If state == START_STATE publish cluster ID debugInfo("Moving to active state"); StateChangedEvent event = new StateChangedEvent(state, ACTIVE_COORDINATOR); state = ACTIVE_COORDINATOR; setActiveNodeID(getLocalNodeID()); info("Becoming " + state, true); fireStateChangedOperatorEvent(); electionMgr.declareWinner(this.activeNode); stateChangeSink.add(event); } else { throw new AssertionError("Cant move to " + ACTIVE_COORDINATOR + " from " + state); } }
private synchronized void handleElectionResultMessage(L2StateMessage msg) throws GroupException { if (activeNode.equals(msg.getEnrollment().getNodeID())) { Assert.assertFalse(ServerID.NULL_ID.equals(activeNode)); // This wouldn't normally happen, but we agree - so ack GroupMessage resultAgreed = L2StateMessage.createResultAgreedMessage(msg, msg.getEnrollment()); logger.info("Agreed with Election Result from " + msg.messageFrom() + " : " + resultAgreed); groupManager.sendTo(msg.messageFrom(), resultAgreed); } else if (state == ACTIVE_COORDINATOR || !activeNode.isNull() || (msg.getEnrollment().isANewCandidate() && state != START_STATE)) { // Condition 1 : // Obviously an issue. // Condition 2 : // This shouldn't happen normally, but is possible when there is some weird network error // where A sees B, // B sees A/C and C sees B and A is active and C is trying to run election // Force other node to rerun election so that we can abort // Condition 3 : // We don't want new L2s to win an election when there are old L2s in PASSIVE states. GroupMessage resultConflict = L2StateMessage.createResultConflictMessage( msg, EnrollmentFactory.createTrumpEnrollment(getLocalNodeID(), weightsFactory)); warn( "WARNING :: Active Node = " + activeNode + " , " + state + " received ELECTION_RESULT message from another node : " + msg + " : Forcing re-election " + resultConflict); groupManager.sendTo(msg.messageFrom(), resultConflict); } else { debugInfo("ElectionMgr handling election result msg: " + msg); electionMgr.handleElectionResultMessage(msg); } }
private synchronized void moveToPassiveState(Enrollment winningEnrollment) { electionMgr.reset(winningEnrollment); if (state == START_STATE) { state = PASSIVE_UNINITIALIZED; info("Moved to " + state, true); fireStateChangedOperatorEvent(); stateChangeSink.add(new StateChangedEvent(START_STATE, state)); } else if (state == ACTIVE_COORDINATOR) { // TODO:: Support this later throw new AssertionError( "Cant move to " + PASSIVE_UNINITIALIZED + " from " + ACTIVE_COORDINATOR + " at least for now"); } else { debugInfo( "Move to passive state ignored - state=" + state + ", winningEnrollment: " + winningEnrollment); } }