private boolean shouldRetryImmediately( SingularityRequest request, SingularityDeployStatistics deployStatistics) { if (!request.getNumRetriesOnFailure().isPresent()) { return false; } final int numRetriesInARow = deployStatistics.getNumSequentialRetries(); if (numRetriesInARow >= request.getNumRetriesOnFailure().get()) { LOG.debug( "Request {} had {} retries in a row, not retrying again (num retries on failure: {})", request.getId(), numRetriesInARow, request.getNumRetriesOnFailure()); return false; } LOG.debug( "Request {} had {} retries in a row - retrying again (num retries on failure: {})", request.getId(), numRetriesInARow, request.getNumRetriesOnFailure()); return true; }
private void updateDeployStatistics( SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) { SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder(); if (bldr.getAverageRuntimeMillis().isPresent()) { long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1); bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis)); } else { bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt())); } bldr.setNumTasks(bldr.getNumTasks() + 1); if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) { bldr.setLastFinishAt(Optional.of(timestamp)); bldr.setLastTaskState(Optional.of(state)); } final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps(); final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo()); if (!state.isSuccess()) { if (SingularityTaskHistoryUpdate.getUpdate( taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING) .isPresent()) { LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state); } else { if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) { sequentialFailureTimestamps.add(timestamp); } else if (timestamp > sequentialFailureTimestamps.get(0)) { sequentialFailureTimestamps.set(0, timestamp); } Collections.sort(sequentialFailureTimestamps); } } else { bldr.setNumSuccess(bldr.getNumSuccess() + 1); sequentialFailureTimestamps.clear(); } if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) { bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1); } else { bldr.setNumSequentialRetries(0); } final SingularityDeployStatistics newStatistics = bldr.build(); LOG.trace("Saving new deploy statistics {}", newStatistics); deployManager.saveDeployStatistics(newStatistics); }