private void updateDeployStatistics( SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) { SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder(); if (bldr.getAverageRuntimeMillis().isPresent()) { long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1); bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis)); } else { bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt())); } bldr.setNumTasks(bldr.getNumTasks() + 1); if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) { bldr.setLastFinishAt(Optional.of(timestamp)); bldr.setLastTaskState(Optional.of(state)); } final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps(); final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo()); if (!state.isSuccess()) { if (SingularityTaskHistoryUpdate.getUpdate( taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING) .isPresent()) { LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state); } else { if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) { sequentialFailureTimestamps.add(timestamp); } else if (timestamp > sequentialFailureTimestamps.get(0)) { sequentialFailureTimestamps.set(0, timestamp); } Collections.sort(sequentialFailureTimestamps); } } else { bldr.setNumSuccess(bldr.getNumSuccess() + 1); sequentialFailureTimestamps.clear(); } if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) { bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1); } else { bldr.setNumSequentialRetries(0); } final SingularityDeployStatistics newStatistics = bldr.build(); LOG.trace("Saving new deploy statistics {}", newStatistics); deployManager.saveDeployStatistics(newStatistics); }
private Optional<PendingType> handleCompletedTaskWithStatistics( Optional<SingularityTask> task, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, SingularityDeployStatistics deployStatistics, SingularityCreateResult taskHistoryUpdateCreateResult, SingularitySchedulerStateCache stateCache) { final Optional<SingularityRequestWithState> maybeRequestWithState = requestManager.getRequest(taskId.getRequestId()); if (!isRequestActive(maybeRequestWithState)) { LOG.warn( "Not scheduling a new task, {} is {}", taskId.getRequestId(), SingularityRequestWithState.getRequestState(maybeRequestWithState)); return Optional.absent(); } RequestState requestState = maybeRequestWithState.get().getState(); final SingularityRequest request = maybeRequestWithState.get().getRequest(); final Optional<SingularityRequestDeployState> requestDeployState = deployManager.getRequestDeployState(request.getId()); if (!isDeployInUse(requestDeployState, taskId.getDeployId(), true)) { LOG.debug( "Task {} completed, but it didn't match active deploy state {} - ignoring", taskId.getId(), requestDeployState); return Optional.absent(); } if (taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && requestState != RequestState.SYSTEM_COOLDOWN) { mailer.sendTaskCompletedMail(task, taskId, request, state); } else if (requestState == RequestState.SYSTEM_COOLDOWN) { LOG.debug("Not sending a task completed email because task {} is in SYSTEM_COOLDOWN", taskId); } else { LOG.debug( "Not sending a task completed email for task {} because Singularity already processed this update", taskId); } if (!state.isSuccess() && taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && cooldown.shouldEnterCooldown( request, taskId, requestState, deployStatistics, timestamp)) { LOG.info("Request {} is entering cooldown due to task {}", request.getId(), taskId); requestState = RequestState.SYSTEM_COOLDOWN; requestManager.cooldown(request, System.currentTimeMillis()); mailer.sendRequestInCooldownMail(request); } PendingType pendingType = PendingType.TASK_DONE; if (!state.isSuccess() && shouldRetryImmediately(request, deployStatistics)) { LOG.debug("Retrying {} because {}", request.getId(), state); pendingType = PendingType.RETRY; } else if (!request.isAlwaysRunning()) { return Optional.absent(); } if (state.isSuccess() && requestState == RequestState.SYSTEM_COOLDOWN) { // TODO send not cooldown anymore email LOG.info("Request {} succeeded a task, removing from cooldown", request.getId()); requestState = RequestState.ACTIVE; requestManager.exitCooldown(request, System.currentTimeMillis()); } SingularityPendingRequest pendingRequest = new SingularityPendingRequest( request.getId(), requestDeployState.get().getActiveDeploy().get().getDeployId(), System.currentTimeMillis(), pendingType); scheduleTasks( stateCache, request, requestState, deployStatistics, pendingRequest, getMatchingTaskIds(stateCache, request, pendingRequest)); return Optional.of(pendingType); }