private Consumer<Transition<State>> dieOnError(final Consumer<Transition<State>> closure) { return transition -> { try { closure.accept(transition); } catch (RuntimeException e) { LOG.error("Caught unchecked exception: " + e, e); stateMachine.transition(State.DEAD); throw e; } }; }
@Override public void onDefeated() { LOG.error("Lost leadership, committing suicide."); stateMachine.transition(State.DEAD); }
@Override public void onLeading(LeaderControl control) { leaderControl.set(control); stateMachine.transition(State.LEADER_AWAITING_REGISTRATION); }
@Subscribe public void registered(DriverRegistered event) { stateMachine.transition(State.ACTIVE); }
/** * Prepares a scheduler to offer itself as a leader candidate. After this call the scheduler will * host a live log replica and start syncing data from the leader via the log until it gets called * upon to lead. * * @return A listener that can be offered for leadership of a distributed election. */ public LeadershipListener prepare() { stateMachine.transition(State.PREPARING_STORAGE); return leadershipListener; }
@VisibleForTesting SchedulerLifecycle( final NonVolatileStorage storage, final Lifecycle lifecycle, final Driver driver, final DelayedActions delayedActions, final ShutdownRegistry shutdownRegistry, StatsProvider statsProvider, final ServiceManagerIface schedulerActiveServiceManager) { requireNonNull(storage); requireNonNull(lifecycle); requireNonNull(driver); requireNonNull(delayedActions); requireNonNull(shutdownRegistry); statsProvider.makeGauge( REGISTERED_GAUGE, new Supplier<Integer>() { @Override public Integer get() { return registrationAcked.get() ? 1 : 0; } }); for (final State state : State.values()) { statsProvider.makeGauge( stateGaugeName(state), new Supplier<Integer>() { @Override public Integer get() { return (state == stateMachine.getState()) ? 1 : 0; } }); } shutdownRegistry.addAction( new ExceptionalCommand<TimeoutException>() { @Override public void execute() throws TimeoutException { stateMachine.transition(State.DEAD); schedulerActiveServiceManager.stopAsync(); schedulerActiveServiceManager.awaitStopped(5L, TimeUnit.SECONDS); } }); final Consumer<Transition<State>> prepareStorage = new Consumer<Transition<State>>() { @Override public void accept(Transition<State> transition) { storage.prepare(); stateMachine.transition(State.STORAGE_PREPARED); } }; final Consumer<Transition<State>> handleLeading = new Consumer<Transition<State>>() { @Override public void accept(Transition<State> transition) { LOG.info("Elected as leading scheduler!"); storage.start( stores -> { // If storage backfill operations are necessary, they can be done here. }); driver.startAsync().awaitRunning(); delayedActions.onRegistrationTimeout( () -> { if (!registrationAcked.get()) { LOG.error("Framework has not been registered within the tolerated delay."); stateMachine.transition(State.DEAD); } }); delayedActions.onAutoFailover( () -> { LOG.info("Triggering automatic failover."); stateMachine.transition(State.DEAD); }); } }; final Consumer<Transition<State>> handleRegistered = new Consumer<Transition<State>>() { @Override public void accept(Transition<State> transition) { registrationAcked.set(true); delayedActions.blockingDriverJoin( () -> { driver.blockUntilStopped(); LOG.info("Driver exited, terminating lifecycle."); stateMachine.transition(State.DEAD); }); // TODO(ksweeney): Extract leader advertisement to its own service. schedulerActiveServiceManager.startAsync().awaitHealthy(); try { leaderControl.get().advertise(); } catch (SingletonService.AdvertiseException | InterruptedException e) { LOG.error("Failed to advertise leader, shutting down."); throw new RuntimeException(e); } } }; final Consumer<Transition<State>> shutDown = new Consumer<Transition<State>>() { private final AtomicBoolean invoked = new AtomicBoolean(false); @Override public void accept(Transition<State> transition) { if (!invoked.compareAndSet(false, true)) { LOG.info("Shutdown already invoked, ignoring extra call."); return; } // TODO(wfarner): Consider using something like guava's Closer to abstractly tear down // resources here. try { LeaderControl control = leaderControl.get(); if (control != null) { try { control.leave(); } catch (SingletonService.LeaveException e) { LOG.warn("Failed to leave leadership: " + e, e); } } // TODO(wfarner): Re-evaluate tear-down ordering here. Should the top-level shutdown // be invoked first, or the underlying critical components? driver.stopAsync().awaitTerminated(); storage.stop(); } finally { lifecycle.shutdown(); } } }; stateMachine = StateMachine.<State>builder("SchedulerLifecycle") .initialState(State.IDLE) .logTransitions() .addState( dieOnError(Consumers.filter(NOT_DEAD, prepareStorage)), State.IDLE, State.PREPARING_STORAGE, State.DEAD) .addState(State.PREPARING_STORAGE, State.STORAGE_PREPARED, State.DEAD) .addState( dieOnError(Consumers.filter(NOT_DEAD, handleLeading)), State.STORAGE_PREPARED, State.LEADER_AWAITING_REGISTRATION, State.DEAD) .addState( dieOnError(Consumers.filter(NOT_DEAD, handleRegistered)), State.LEADER_AWAITING_REGISTRATION, State.ACTIVE, State.DEAD) .addState(State.ACTIVE, State.DEAD) .addState( State.DEAD, // Allow cycles in DEAD to prevent throwing and avoid the need for call-site // checking. State.DEAD) .onAnyTransition(Consumers.filter(IS_DEAD, shutDown)) .build(); this.leadershipListener = new SchedulerCandidateImpl(stateMachine, leaderControl); }