private int getNumMissingInstances( List<SingularityTaskId> matchingTaskIds, SingularityRequest request, SingularityPendingRequest pendingRequest) { if (request.isOneOff() && pendingRequest.getPendingType() == PendingType.ONEOFF) { return 1; } final int numInstances = request.getInstancesSafe(); return numInstances - matchingTaskIds.size(); }
private List<SingularityTaskId> getMatchingTaskIds( SingularitySchedulerStateCache stateCache, SingularityRequest request, SingularityPendingRequest pendingRequest) { if (request.isLongRunning()) { return SingularityTaskId.matchingAndNotIn( stateCache.getActiveTaskIds(), request.getId(), pendingRequest.getDeployId(), stateCache.getCleaningTasks()); } else { return Lists.newArrayList( Iterables.filter( stateCache.getActiveTaskIds(), SingularityTaskId.matchingRequest(request.getId()))); } }
private void checkReschedule( SingularityRequest newRequest, Optional<SingularityRequest> maybeOldRequest, long timestamp) { if (!maybeOldRequest.isPresent()) { return; } if (shouldReschedule(newRequest, maybeOldRequest.get())) { Optional<String> maybeDeployId = deployManager.getInUseDeployId(newRequest.getId()); if (maybeDeployId.isPresent()) { requestManager.addToPendingQueue( new SingularityPendingRequest( newRequest.getId(), maybeDeployId.get(), timestamp, PendingType.UPDATED_REQUEST)); } } }
@PUT @Path("/request/{requestId}/instances") @ApiOperation( value = "Scale the number of instances up or down for a specific Request", response = SingularityRequest.class) @ApiResponses({ @ApiResponse(code = 400, message = "Posted object did not match Request ID"), @ApiResponse(code = 404, message = "No Request with that ID"), }) public SingularityRequest updateInstances( @ApiParam("The Request ID to scale") @PathParam("requestId") String requestId, @ApiParam("Username of the person requesting the scale") @QueryParam("user") Optional<String> user, @ApiParam("Object to hold number of instances to request") SingularityRequestInstances newInstances) { checkBadRequest( requestId != null && newInstances.getId() != null && requestId.equals(newInstances.getId()), "Update for request instance must pass a matching non-null requestId in path (%s) and object (%s)", requestId, newInstances.getId()); checkConflict( !requestManager.cleanupRequestExists(requestId), "Request %s is currently cleaning. Try again after a few moments", requestId); SingularityRequest oldRequest = fetchRequest(requestId); Optional<SingularityRequest> maybeOldRequest = Optional.of(oldRequest); SingularityRequestDeployHolder deployHolder = getDeployHolder(newInstances.getId()); SingularityRequest newRequest = oldRequest.toBuilder().setInstances(newInstances.getInstances()).build(); validator.checkSingularityRequest( newRequest, maybeOldRequest, deployHolder.getActiveDeploy(), deployHolder.getPendingDeploy()); final long now = System.currentTimeMillis(); requestManager.update(newRequest, now, user); checkReschedule(newRequest, maybeOldRequest, now); return newRequest; }
private void checkActiveRequest( SingularityRequestWithState requestWithState, Map<SingularityDeployKey, SingularityPendingTaskId> deployKeyToPendingTaskId, final long timestamp) { final SingularityRequest request = requestWithState.getRequest(); if (request.getRequestType() == RequestType.ON_DEMAND || request.getRequestType() == RequestType.RUN_ONCE) { return; // There's no situation where we'd want to schedule an On Demand or Run Once request // at startup, so don't even bother with them. } Optional<SingularityRequestDeployState> requestDeployState = deployManager.getRequestDeployState(request.getId()); if (!requestDeployState.isPresent() || !requestDeployState.get().getActiveDeploy().isPresent()) { LOG.debug("No active deploy for {} - not scheduling on startup", request.getId()); return; } final String activeDeployId = requestDeployState.get().getActiveDeploy().get().getDeployId(); if (request.isScheduled()) { SingularityDeployKey deployKey = new SingularityDeployKey(request.getId(), activeDeployId); SingularityPendingTaskId pendingTaskId = deployKeyToPendingTaskId.get(deployKey); if (pendingTaskId != null && pendingTaskId.getCreatedAt() >= requestWithState.getTimestamp()) { LOG.info( "Not rescheduling {} because {} is newer than {}", request.getId(), pendingTaskId, requestWithState.getTimestamp()); return; } } requestManager.addToPendingQueue( new SingularityPendingRequest( request.getId(), activeDeployId, timestamp, Optional.<String>absent(), PendingType.STARTUP, Optional.<Boolean>absent(), Optional.<String>absent())); }
@POST @Consumes({MediaType.APPLICATION_JSON}) @ApiOperation( value = "Create or update a Singularity Request", response = SingularityRequestParent.class) @ApiResponses({ @ApiResponse(code = 400, message = "Request object is invalid"), @ApiResponse(code = 409, message = "Request object is being cleaned. Try again shortly"), }) public SingularityRequestParent submit( @ApiParam("The Singularity request to create or update") SingularityRequest request, @ApiParam("Username of the person requesting to create or update") @QueryParam("user") Optional<String> user) { checkNotNullBadRequest(request.getId(), "Request must have an id"); checkConflict( !requestManager.cleanupRequestExists(request.getId()), "Request %s is currently cleaning. Try again after a few moments", request.getId()); Optional<SingularityRequestWithState> maybeOldRequestWithState = requestManager.getRequest(request.getId()); Optional<SingularityRequest> maybeOldRequest = maybeOldRequestWithState.isPresent() ? Optional.of(maybeOldRequestWithState.get().getRequest()) : Optional.<SingularityRequest>absent(); SingularityRequestDeployHolder deployHolder = getDeployHolder(request.getId()); SingularityRequest newRequest = validator.checkSingularityRequest( request, maybeOldRequest, deployHolder.getActiveDeploy(), deployHolder.getPendingDeploy()); checkConflict( maybeOldRequest.isPresent() || !requestManager.cleanupRequestExists(request.getId()), "Request %s is currently cleaning. Try again after a few moments", request.getId()); final long now = System.currentTimeMillis(); requestManager.activate( newRequest, maybeOldRequest.isPresent() ? RequestHistoryType.UPDATED : RequestHistoryType.CREATED, now, user); checkReschedule(newRequest, maybeOldRequest, now); return fillEntireRequest(fetchRequestWithState(request.getId())); }
private boolean shouldRetryImmediately( SingularityRequest request, SingularityDeployStatistics deployStatistics) { if (!request.getNumRetriesOnFailure().isPresent()) { return false; } final int numRetriesInARow = deployStatistics.getNumSequentialRetries(); if (numRetriesInARow >= request.getNumRetriesOnFailure().get()) { LOG.debug( "Request {} had {} retries in a row, not retrying again (num retries on failure: {})", request.getId(), numRetriesInARow, request.getNumRetriesOnFailure()); return false; } LOG.debug( "Request {} had {} retries in a row - retrying again (num retries on failure: {})", request.getId(), numRetriesInARow, request.getNumRetriesOnFailure()); return true; }
private List<SingularityPendingTask> getScheduledTaskIds( int numMissingInstances, List<SingularityTaskId> matchingTaskIds, SingularityRequest request, RequestState state, SingularityDeployStatistics deployStatistics, String deployId, SingularityPendingRequest pendingRequest) { final Optional<Long> nextRunAt = getNextRunAt(request, state, deployStatistics, pendingRequest.getPendingType()); if (!nextRunAt.isPresent()) { return Collections.emptyList(); } final Set<Integer> inuseInstanceNumbers = Sets.newHashSetWithExpectedSize(matchingTaskIds.size()); for (SingularityTaskId matchingTaskId : matchingTaskIds) { inuseInstanceNumbers.add(matchingTaskId.getInstanceNo()); } final List<SingularityPendingTask> newTasks = Lists.newArrayListWithCapacity(numMissingInstances); int nextInstanceNumber = 1; for (int i = 0; i < numMissingInstances; i++) { while (inuseInstanceNumbers.contains(nextInstanceNumber)) { nextInstanceNumber++; } newTasks.add( new SingularityPendingTask( new SingularityPendingTaskId( request.getId(), deployId, nextRunAt.get(), nextInstanceNumber, pendingRequest.getPendingType(), pendingRequest.getTimestamp()), pendingRequest.getCmdLineArgsList(), pendingRequest.getUser())); nextInstanceNumber++; } return newTasks; }
private boolean shouldReschedule(SingularityRequest newRequest, SingularityRequest oldRequest) { if (newRequest.getInstancesSafe() != oldRequest.getInstancesSafe()) { return true; } if (newRequest.isScheduled() && oldRequest.isScheduled()) { if (!newRequest.getQuartzScheduleSafe().equals(oldRequest.getQuartzScheduleSafe())) { return true; } } return false; }
private Optional<Long> getNextRunAt( SingularityRequest request, RequestState state, SingularityDeployStatistics deployStatistics, PendingType pendingType) { final long now = System.currentTimeMillis(); long nextRunAt = now; if (request.isScheduled()) { if (pendingType == PendingType.IMMEDIATE || pendingType == PendingType.RETRY) { LOG.info("Scheduling requested immediate run of {}", request.getId()); } else { try { Date scheduleFrom = new Date(now); CronExpression cronExpression = new CronExpression(request.getQuartzScheduleSafe()); final Date nextRunAtDate = cronExpression.getNextValidTimeAfter(scheduleFrom); if (nextRunAtDate == null) { return Optional.absent(); } LOG.trace( "Calculating nextRunAtDate for {} (schedule: {}): {} (from: {})", request.getId(), request.getSchedule(), nextRunAtDate, scheduleFrom); nextRunAt = Math.max( nextRunAtDate.getTime(), now); // don't create a schedule that is overdue as this is used to indicate that // singularity is not fulfilling requests. LOG.trace( "Scheduling next run of {} (schedule: {}) at {} (from: {})", request.getId(), request.getSchedule(), nextRunAtDate, scheduleFrom); } catch (ParseException pe) { throw Throwables.propagate(pe); } } } if (pendingType == PendingType.TASK_DONE && request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().or(0L) > 0) { nextRunAt = Math.max( nextRunAt, now + request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get()); LOG.trace( "Adjusted next run of {} to {} (by {}) due to waitAtLeastMillisAfterTaskFinishesForReschedule", request.getId(), nextRunAt, JavaUtils.durationFromMillis( request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get())); } if (state == RequestState.SYSTEM_COOLDOWN && pendingType != PendingType.NEW_DEPLOY) { final long prevNextRunAt = nextRunAt; nextRunAt = Math.max( nextRunAt, now + TimeUnit.SECONDS.toMillis(configuration.getCooldownMinScheduleSeconds())); LOG.trace( "Adjusted next run of {} to {} (from: {}) due to cooldown", request.getId(), nextRunAt, prevNextRunAt); } return Optional.of(nextRunAt); }
private Optional<PendingType> handleCompletedTaskWithStatistics( Optional<SingularityTask> task, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, SingularityDeployStatistics deployStatistics, SingularityCreateResult taskHistoryUpdateCreateResult, SingularitySchedulerStateCache stateCache) { final Optional<SingularityRequestWithState> maybeRequestWithState = requestManager.getRequest(taskId.getRequestId()); if (!isRequestActive(maybeRequestWithState)) { LOG.warn( "Not scheduling a new task, {} is {}", taskId.getRequestId(), SingularityRequestWithState.getRequestState(maybeRequestWithState)); return Optional.absent(); } RequestState requestState = maybeRequestWithState.get().getState(); final SingularityRequest request = maybeRequestWithState.get().getRequest(); final Optional<SingularityRequestDeployState> requestDeployState = deployManager.getRequestDeployState(request.getId()); if (!isDeployInUse(requestDeployState, taskId.getDeployId(), true)) { LOG.debug( "Task {} completed, but it didn't match active deploy state {} - ignoring", taskId.getId(), requestDeployState); return Optional.absent(); } if (taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && requestState != RequestState.SYSTEM_COOLDOWN) { mailer.sendTaskCompletedMail(task, taskId, request, state); } else if (requestState == RequestState.SYSTEM_COOLDOWN) { LOG.debug("Not sending a task completed email because task {} is in SYSTEM_COOLDOWN", taskId); } else { LOG.debug( "Not sending a task completed email for task {} because Singularity already processed this update", taskId); } if (!state.isSuccess() && taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && cooldown.shouldEnterCooldown( request, taskId, requestState, deployStatistics, timestamp)) { LOG.info("Request {} is entering cooldown due to task {}", request.getId(), taskId); requestState = RequestState.SYSTEM_COOLDOWN; requestManager.cooldown(request, System.currentTimeMillis()); mailer.sendRequestInCooldownMail(request); } PendingType pendingType = PendingType.TASK_DONE; if (!state.isSuccess() && shouldRetryImmediately(request, deployStatistics)) { LOG.debug("Retrying {} because {}", request.getId(), state); pendingType = PendingType.RETRY; } else if (!request.isAlwaysRunning()) { return Optional.absent(); } if (state.isSuccess() && requestState == RequestState.SYSTEM_COOLDOWN) { // TODO send not cooldown anymore email LOG.info("Request {} succeeded a task, removing from cooldown", request.getId()); requestState = RequestState.ACTIVE; requestManager.exitCooldown(request, System.currentTimeMillis()); } SingularityPendingRequest pendingRequest = new SingularityPendingRequest( request.getId(), requestDeployState.get().getActiveDeploy().get().getDeployId(), System.currentTimeMillis(), pendingType); scheduleTasks( stateCache, request, requestState, deployStatistics, pendingRequest, getMatchingTaskIds(stateCache, request, pendingRequest)); return Optional.of(pendingType); }
private int scheduleTasks( SingularitySchedulerStateCache stateCache, SingularityRequest request, RequestState state, SingularityDeployStatistics deployStatistics, SingularityPendingRequest pendingRequest, List<SingularityTaskId> matchingTaskIds) { deleteScheduledTasks(stateCache.getScheduledTasks(), pendingRequest); final int numMissingInstances = getNumMissingInstances(matchingTaskIds, request, pendingRequest); LOG.debug( "Missing {} instances of request {} (matching tasks: {}), pending request: {}", numMissingInstances, request.getId(), matchingTaskIds, pendingRequest); if (numMissingInstances > 0) { final List<SingularityPendingTask> scheduledTasks = getScheduledTaskIds( numMissingInstances, matchingTaskIds, request, state, deployStatistics, pendingRequest.getDeployId(), pendingRequest); if (!scheduledTasks.isEmpty()) { LOG.trace("Scheduling tasks: {}", scheduledTasks); for (SingularityPendingTask scheduledTask : scheduledTasks) { taskManager.savePendingTask(scheduledTask); } } else { LOG.info( "No new scheduled tasks found for {}, setting state to {}", request.getId(), RequestState.FINISHED); requestManager.finish(request, System.currentTimeMillis()); } } else if (numMissingInstances < 0) { final long now = System.currentTimeMillis(); Collections.sort( matchingTaskIds, Collections.reverseOrder( SingularityTaskId.INSTANCE_NO_COMPARATOR)); // clean the highest numbers for (int i = 0; i < Math.abs(numMissingInstances); i++) { final SingularityTaskId toCleanup = matchingTaskIds.get(i); LOG.info( "Cleaning up task {} due to new request {} - scaling down to {} instances", toCleanup.getId(), request.getId(), request.getInstancesSafe()); taskManager.createTaskCleanup( new SingularityTaskCleanup( pendingRequest.getUser(), TaskCleanupType.SCALING_DOWN, now, toCleanup, Optional.<String>absent())); } } return numMissingInstances; }