@Override public void start(final IJobUpdate update, final AuditData auditData) throws UpdateStateException { requireNonNull(update); requireNonNull(auditData); storage.write( (NoResult<UpdateStateException>) storeProvider -> { IJobUpdateSummary summary = update.getSummary(); IJobUpdateInstructions instructions = update.getInstructions(); IJobKey job = summary.getKey().getJob(); // Validate the update configuration by making sure we can create an updater for it. updateFactory.newUpdate(update.getInstructions(), true); if (instructions.getInitialState().isEmpty() && !instructions.isSetDesiredState()) { throw new IllegalArgumentException("Update instruction is a no-op."); } List<IJobUpdateSummary> activeJobUpdates = storeProvider.getJobUpdateStore().fetchJobUpdateSummaries(queryActiveByJob(job)); if (!activeJobUpdates.isEmpty()) { throw new UpdateStateException( "An active update already exists for this job, " + "please terminate it before starting another. " + "Active updates are those in states " + Updates.ACTIVE_JOB_UPDATE_STATES); } LOG.info("Starting update for job " + job); ILock lock; try { lock = lockManager.acquireLock( ILockKey.build(LockKey.job(job.newBuilder())), auditData.getUser()); } catch (LockException e) { throw new UpdateStateException(e.getMessage(), e); } storeProvider .getJobUpdateStore() .saveJobUpdate(update, Optional.of(requireNonNull(lock.getToken()))); JobUpdateStatus status = ROLLING_FORWARD; if (isCoordinatedUpdate(instructions)) { status = ROLL_FORWARD_AWAITING_PULSE; pulseHandler.initializePulseState(update, status); } recordAndChangeJobUpdateStatus( storeProvider, summary.getKey(), addAuditData(newEvent(status), auditData)); }); }
@Override public void systemResume() { storage.write( (NoResult.Quiet) storeProvider -> { for (IJobUpdateDetails details : storeProvider.getJobUpdateStore().fetchJobUpdateDetails(ACTIVE_QUERY)) { IJobUpdateSummary summary = details.getUpdate().getSummary(); IJobUpdateInstructions instructions = details.getUpdate().getInstructions(); IJobUpdateKey key = summary.getKey(); JobUpdateStatus status = summary.getState().getStatus(); if (isCoordinatedUpdate(instructions)) { LOG.info("Automatically restoring pulse state for " + key); pulseHandler.initializePulseState(details.getUpdate(), status); } if (AUTO_RESUME_STATES.contains(status)) { LOG.info("Automatically resuming update " + key); try { changeJobUpdateStatus(storeProvider, key, newEvent(status), false); } catch (UpdateStateException e) { throw Throwables.propagate(e); } } } }); }
private void changeUpdateStatus( MutableStoreProvider storeProvider, IJobUpdateSummary updateSummary, JobUpdateEvent event) throws UpdateStateException { if (updateSummary.getState().getStatus() == event.getStatus()) { return; } assertTransitionAllowed(updateSummary.getState().getStatus(), event.getStatus()); recordAndChangeJobUpdateStatus(storeProvider, updateSummary.getKey(), event); }
private void evaluateUpdater( final MutableStoreProvider storeProvider, final UpdateFactory.Update update, IJobUpdateSummary summary, Map<Integer, Optional<IScheduledTask>> changedInstance) throws UpdateStateException { JobUpdateStatus updaterStatus = summary.getState().getStatus(); final IJobUpdateKey key = summary.getKey(); JobUpdateStore.Mutable updateStore = storeProvider.getJobUpdateStore(); if (!updateStore.getLockToken(key).isPresent()) { recordAndChangeJobUpdateStatus( storeProvider, key, newEvent(ERROR).setMessage(LOST_LOCK_MESSAGE)); return; } IJobUpdateInstructions instructions = updateStore.fetchJobUpdateInstructions(key).get(); if (isCoordinatedAndPulseExpired(key, instructions)) { // Move coordinated update into awaiting pulse state. JobUpdateStatus blockedStatus = getBlockedState(summary.getState().getStatus()); changeUpdateStatus( storeProvider, summary, newEvent(blockedStatus).setMessage(PULSE_TIMEOUT_MESSAGE)); return; } InstanceStateProvider<Integer, Optional<IScheduledTask>> stateProvider = instanceId -> getActiveInstance(storeProvider.getTaskStore(), key.getJob(), instanceId); EvaluationResult<Integer> result = update.getUpdater().evaluate(changedInstance, stateProvider); LOG.info(key + " evaluation result: " + result); for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) { Iterable<InstanceUpdateStatus> statusChanges; int instanceId = entry.getKey(); List<IJobInstanceUpdateEvent> savedEvents = updateStore.fetchInstanceEvents(key, instanceId); Set<JobUpdateAction> savedActions = FluentIterable.from(savedEvents).transform(EVENT_TO_ACTION).toSet(); // Don't bother persisting a sequence of status changes that represents an instance that // was immediately recognized as being healthy and in the desired state. if (entry.getValue().getStatusChanges().equals(NOOP_INSTANCE_UPDATE) && savedEvents.isEmpty()) { LOG.info("Suppressing no-op update for instance " + instanceId); statusChanges = ImmutableSet.of(); } else { statusChanges = entry.getValue().getStatusChanges(); } for (InstanceUpdateStatus statusChange : statusChanges) { JobUpdateAction action = STATE_MAP.get(Pair.of(statusChange, updaterStatus)); requireNonNull(action); // A given instance update action may only be issued once during the update lifecycle. // Suppress duplicate events due to pause/resume operations. if (savedActions.contains(action)) { LOG.info( String.format( "Suppressing duplicate update %s for instance %s.", action, instanceId)); } else { IJobInstanceUpdateEvent event = IJobInstanceUpdateEvent.build( new JobInstanceUpdateEvent() .setInstanceId(instanceId) .setTimestampMs(clock.nowMillis()) .setAction(action)); updateStore.saveJobInstanceUpdateEvent(summary.getKey(), event); } } } OneWayStatus status = result.getStatus(); if (status == SUCCEEDED || status == OneWayStatus.FAILED) { if (SideEffect.hasActions(result.getSideEffects().values())) { throw new IllegalArgumentException( "A terminal state should not specify actions: " + result); } JobUpdateEvent event = new JobUpdateEvent(); if (status == SUCCEEDED) { event.setStatus(update.getSuccessStatus()); } else { event.setStatus(update.getFailureStatus()); // Generate a transition message based on one (arbitrary) instance in the group that pushed // the update over the failure threshold (in all likelihood this group is of size 1). // This is done as a rough cut to aid in diagnosing a failed update, as generating a // complete summary would likely be of dubious value. for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) { Optional<Failure> failure = entry.getValue().getFailure(); if (failure.isPresent()) { event.setMessage(failureMessage(entry.getKey(), failure.get())); break; } } } changeUpdateStatus(storeProvider, summary, event); } else { LOG.info("Executing side-effects for update of " + key + ": " + result.getSideEffects()); for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) { IInstanceKey instance = InstanceKeys.from(key.getJob(), entry.getKey()); Optional<InstanceAction> action = entry.getValue().getAction(); if (action.isPresent()) { Optional<InstanceActionHandler> handler = action.get().getHandler(); if (handler.isPresent()) { Optional<Amount<Long, Time>> reevaluateDelay = handler .get() .getReevaluationDelay( instance, instructions, storeProvider, stateManager, updaterStatus); if (reevaluateDelay.isPresent()) { executor.schedule( getDeferredEvaluator(instance, key), reevaluateDelay.get().getValue(), reevaluateDelay.get().getUnit().getTimeUnit()); } } } } } }