@Override public void resume(final IJobUpdateKey key, final AuditData auditData) throws UpdateStateException { requireNonNull(key); requireNonNull(auditData); LOG.info("Attempting to resume update " + key); storage.write( (NoResult<UpdateStateException>) storeProvider -> { IJobUpdateDetails details = Iterables.getOnlyElement( storeProvider.getJobUpdateStore().fetchJobUpdateDetails(queryByUpdate(key)), null); if (details == null) { throw new UpdateStateException("Update does not exist: " + key); } IJobUpdate update = details.getUpdate(); IJobUpdateKey key1 = update.getSummary().getKey(); Function<JobUpdateStatus, JobUpdateStatus> stateChange = isCoordinatedAndPulseExpired(key1, update.getInstructions()) ? GET_BLOCKED_RESUME_STATE : GET_ACTIVE_RESUME_STATE; JobUpdateStatus newStatus = stateChange.apply(update.getSummary().getState().getStatus()); changeUpdateStatus( storeProvider, update.getSummary(), addAuditData(newEvent(newStatus), auditData)); }); }
@Override public void validateIfLocked(final ILockKey context, Optional<ILock> heldLock) throws LockException { Optional<ILock> stored = storage.read(storeProvider -> storeProvider.getLockStore().fetchLock(context)); // The implementation below assumes the following use cases: // +-----------+-----------------+----------+ // | eq | held | not held | // +-----------+-----------------+----------+ // |stored |(stored == held)?| invalid | // +-----------+-----------------+----------+ // |not stored | invalid | valid | // +-----------+-----------------+----------+ if (!stored.equals(heldLock)) { if (stored.isPresent()) { throw new LockException( String.format( "Unable to perform operation for: %s. Use override/cancel option.", formatLockKey(context))); } else if (heldLock.isPresent()) { throw new LockException( String.format("Invalid operation context: %s", formatLockKey(context))); } } }
@Override public void restartShards(IJobKey jobKey, final Set<Integer> shards, final String requestingUser) throws ScheduleException { if (!JobKeys.isValid(jobKey)) { throw new ScheduleException("Invalid job key: " + jobKey); } if (shards.isEmpty()) { throw new ScheduleException("At least one shard must be specified."); } final Query.Builder query = Query.instanceScoped(jobKey, shards).active(); storage.write( new MutateWork.NoResult<ScheduleException>() { @Override protected void execute(MutableStoreProvider storeProvider) throws ScheduleException { Set<IScheduledTask> matchingTasks = storeProvider.getTaskStore().fetchTasks(query); if (matchingTasks.size() != shards.size()) { throw new ScheduleException("Not all requested shards are active."); } LOG.info("Restarting shards matching " + query); for (String taskId : Tasks.ids(matchingTasks)) { stateManager.changeState( taskId, Optional.<ScheduleStatus>absent(), RESTARTING, Optional.of("Restarted by " + requestingUser)); } } }); }
private Runnable getDeferredEvaluator(final IInstanceKey instance, final IJobUpdateKey key) { return () -> storage.write( (NoResult.Quiet) storeProvider -> { IJobUpdateSummary summary = getOnlyMatch(storeProvider.getJobUpdateStore(), queryByUpdate(key)); JobUpdateStatus status = summary.getState().getStatus(); // Suppress this evaluation if the updater is not currently active. if (JobUpdateStateMachine.isActive(status)) { UpdateFactory.Update update = updates.get(instance.getJobKey()); try { evaluateUpdater( storeProvider, update, summary, ImmutableMap.of( instance.getInstanceId(), getActiveInstance( storeProvider.getTaskStore(), instance.getJobKey(), instance.getInstanceId()))); } catch (UpdateStateException e) { throw Throwables.propagate(e); } } }); }
@Override public ILock acquireLock(final ILockKey lockKey, final String user) throws LockException { return storage.write( storeProvider -> { LockStore.Mutable lockStore = storeProvider.getLockStore(); Optional<ILock> existingLock = lockStore.fetchLock(lockKey); if (existingLock.isPresent()) { throw new LockException( String.format( "Operation for: %s is already in progress. Started at: %s. Current owner: %s.", formatLockKey(lockKey), new Date(existingLock.get().getTimestampMs()).toString(), existingLock.get().getUser())); } ILock lock = ILock.build( new Lock() .setKey(lockKey.newBuilder()) .setToken(tokenGenerator.createNew().toString()) .setTimestampMs(clock.nowMillis()) .setUser(user)); lockStore.saveLock(lock); return lock; }); }
@Override public void addInstances( final IJobKey jobKey, final ImmutableSet<Integer> instanceIds, final ITaskConfig config) throws ScheduleException { storage.write( new MutateWork.NoResult<ScheduleException>() { @Override protected void execute(MutableStoreProvider storeProvider) throws ScheduleException { validateTaskLimits(config, instanceIds.size()); ImmutableSet<IScheduledTask> tasks = storeProvider.getTaskStore().fetchTasks(Query.jobScoped(jobKey).active()); Set<Integer> existingInstanceIds = FluentIterable.from(tasks).transform(Tasks.SCHEDULED_TO_INSTANCE_ID).toSet(); if (!Sets.intersection(existingInstanceIds, instanceIds).isEmpty()) { throw new ScheduleException("Instance ID collision detected."); } stateManager.insertPendingTasks(Maps.asMap(instanceIds, Functions.constant(config))); } }); }
@Override public synchronized void createJob(final SanitizedConfiguration sanitizedConfiguration) throws ScheduleException { storage.write( new MutateWork.NoResult<ScheduleException>() { @Override protected void execute(MutableStoreProvider storeProvider) throws ScheduleException { final IJobConfiguration job = sanitizedConfiguration.getJobConfig(); if (hasActiveJob(job)) { throw new ScheduleException("Job already exists: " + JobKeys.toPath(job)); } validateTaskLimits(job.getTaskConfig(), job.getInstanceCount()); boolean accepted = false; // TODO(wfarner): Remove the JobManager abstraction, and directly invoke addInstances // here for non-cron jobs. for (final JobManager manager : jobManagers) { if (manager.receiveJob(sanitizedConfiguration)) { LOG.info("Job accepted by manager: " + manager.getUniqueKey()); accepted = true; break; } } if (!accepted) { LOG.severe("Job was not accepted by any of the configured schedulers, discarding."); LOG.severe("Discarded job: " + job); throw new ScheduleException("Job not accepted, discarding."); } } }); }
private void instanceChanged(final IInstanceKey instance, final Optional<IScheduledTask> state) { storage.write( (NoResult.Quiet) storeProvider -> { IJobKey job = instance.getJobKey(); UpdateFactory.Update update = updates.get(job); if (update != null) { if (update.getUpdater().containsInstance(instance.getInstanceId())) { LOG.info("Forwarding task change for " + InstanceKeys.toString(instance)); try { evaluateUpdater( storeProvider, update, getOnlyMatch(storeProvider.getJobUpdateStore(), queryActiveByJob(job)), ImmutableMap.of(instance.getInstanceId(), state)); } catch (UpdateStateException e) { throw Throwables.propagate(e); } } else { LOG.info( "Instance " + instance + " is not part of active update for " + JobKeys.canonicalString(job)); } } }); }
@Override public void systemResume() { storage.write( (NoResult.Quiet) storeProvider -> { for (IJobUpdateDetails details : storeProvider.getJobUpdateStore().fetchJobUpdateDetails(ACTIVE_QUERY)) { IJobUpdateSummary summary = details.getUpdate().getSummary(); IJobUpdateInstructions instructions = details.getUpdate().getInstructions(); IJobUpdateKey key = summary.getKey(); JobUpdateStatus status = summary.getState().getStatus(); if (isCoordinatedUpdate(instructions)) { LOG.info("Automatically restoring pulse state for " + key); pulseHandler.initializePulseState(details.getUpdate(), status); } if (AUTO_RESUME_STATES.contains(status)) { LOG.info("Automatically resuming update " + key); try { changeJobUpdateStatus(storeProvider, key, newEvent(status), false); } catch (UpdateStateException e) { throw Throwables.propagate(e); } } } }); }
protected void deleteTasks() { storage.write( (Storage.MutateWork.NoResult.Quiet) storeProvider -> { TaskStore.Mutable taskStore = storeProvider.getUnsafeTaskStore(); taskStore.deleteAllTasks(); }); }
protected void createTasks(int size) { storage.write( (Storage.MutateWork.NoResult.Quiet) storeProvider -> { TaskStore.Mutable taskStore = storeProvider.getUnsafeTaskStore(); Set<IScheduledTask> tasks = new Tasks.Builder().build(size); taskStore.saveTasks(tasks); }); }
private void deleteTasks(final Set<String> taskIds) { LOG.info("Pruning inactive tasks " + taskIds); storage.write( new Storage.MutateWork.NoResult.Quiet() { @Override public void execute(Storage.MutableStoreProvider storeProvider) { stateManager.deleteTasks(storeProvider, taskIds); } }); }
/** * Computes total quota allocations. * * @return Total allocated quota. * @throws StorageException if there was a problem fetching quotas from storage. */ public Metric computeQuotaAllocationTotals() throws StorageException { return storage.read( storeProvider -> { Metric allocation = new Metric(); for (IResourceAggregate quota : storeProvider.getQuotaStore().fetchQuotas().values()) { allocation.accumulate(quota); } return allocation; }); }
@Override public void start(final IJobUpdate update, final AuditData auditData) throws UpdateStateException { requireNonNull(update); requireNonNull(auditData); storage.write( (NoResult<UpdateStateException>) storeProvider -> { IJobUpdateSummary summary = update.getSummary(); IJobUpdateInstructions instructions = update.getInstructions(); IJobKey job = summary.getKey().getJob(); // Validate the update configuration by making sure we can create an updater for it. updateFactory.newUpdate(update.getInstructions(), true); if (instructions.getInitialState().isEmpty() && !instructions.isSetDesiredState()) { throw new IllegalArgumentException("Update instruction is a no-op."); } List<IJobUpdateSummary> activeJobUpdates = storeProvider.getJobUpdateStore().fetchJobUpdateSummaries(queryActiveByJob(job)); if (!activeJobUpdates.isEmpty()) { throw new UpdateStateException( "An active update already exists for this job, " + "please terminate it before starting another. " + "Active updates are those in states " + Updates.ACTIVE_JOB_UPDATE_STATES); } LOG.info("Starting update for job " + job); ILock lock; try { lock = lockManager.acquireLock( ILockKey.build(LockKey.job(job.newBuilder())), auditData.getUser()); } catch (LockException e) { throw new UpdateStateException(e.getMessage(), e); } storeProvider .getJobUpdateStore() .saveJobUpdate(update, Optional.of(requireNonNull(lock.getToken()))); JobUpdateStatus status = ROLLING_FORWARD; if (isCoordinatedUpdate(instructions)) { status = ROLL_FORWARD_AWAITING_PULSE; pulseHandler.initializePulseState(update, status); } recordAndChangeJobUpdateStatus( storeProvider, summary.getKey(), addAuditData(newEvent(status), auditData)); }); }
@Override public synchronized void killTasks(Query.Builder query, final String user) throws ScheduleException { checkNotNull(query); LOG.info("Killing tasks matching " + query); boolean jobDeleted = false; if (Query.isOnlyJobScoped(query)) { // If this looks like a query for all tasks in a job, instruct the scheduler modules to // delete the job. IJobKey jobKey = JobKeys.from(query).get(); for (JobManager manager : jobManagers) { if (manager.deleteJob(jobKey)) { jobDeleted = true; } } } // Unless statuses were specifically supplied, only attempt to kill active tasks. final Query.Builder taskQuery = query.get().isSetStatuses() ? query.byStatus(ACTIVE_STATES) : query; int tasksAffected = storage.write( new MutateWork.Quiet<Integer>() { @Override public Integer apply(MutableStoreProvider storeProvider) { int total = 0; for (String taskId : Tasks.ids(storeProvider.getTaskStore().fetchTasks(taskQuery))) { boolean changed = stateManager.changeState( taskId, Optional.<ScheduleStatus>absent(), KILLING, Optional.of("Killed by " + user)); if (changed) { total++; } } return total; } }); if (!jobDeleted && (tasksAffected == 0)) { throw new ScheduleException("No jobs to kill"); } }
/** * Changes the state of an update, without the 'scope' of an update ID. This should only be used * when responding to outside inputs that are inherently un-scoped, such as a user action or task * state change. * * @param key Update identifier. * @param stateChange State change computation, based on the current state of the update. * @throws UpdateStateException If no active update exists for the provided {@code job}, or if the * proposed state transition is not allowed. */ private void unscopedChangeUpdateStatus( final IJobUpdateKey key, final Function<? super JobUpdateStatus, JobUpdateEvent> stateChange) throws UpdateStateException { storage.write( (NoResult<UpdateStateException>) storeProvider -> { IJobUpdateSummary update = Iterables.getOnlyElement( storeProvider.getJobUpdateStore().fetchJobUpdateSummaries(queryByUpdate(key)), null); if (update == null) { throw new UpdateStateException("Update does not exist " + key); } changeUpdateStatus( storeProvider, update, stateChange.apply(update.getState().getStatus())); }); }
@Test public void testRecover() throws Exception { expect(snapshotStore.createSnapshot()).andReturn(SNAPSHOT1); Capture<MutateWork<Object, Exception>> transaction = createCapture(); expect(primaryStorage.write(capture(transaction))).andReturn(null); distributedStore.persist(SNAPSHOT1); shutDownNow.execute(); control.replay(); assertEquals(ImmutableSet.<String>of(), recovery.listBackups()); clock.advance(INTERVAL); storageBackup.createSnapshot(); String backup1 = storageBackup.createBackupName(); assertEquals(ImmutableSet.of(backup1), recovery.listBackups()); recovery.stage(backup1); assertEquals( IScheduledTask.setFromBuilders(SNAPSHOT1.getTasks()), recovery.query(Query.unscoped())); recovery.commit(); transaction.getValue().apply(storeProvider); }
@Override public void releaseLock(final ILock lock) { storage.write( (NoResult.Quiet) storeProvider -> storeProvider.getLockStore().removeLock(lock.getKey())); }
@Override public Iterable<ILock> getLocks() { return storage.read(storeProvider -> storeProvider.getLockStore().fetchLocks()); }