private String getDefaultPath(String taskId, String qPath) { if (!Strings.isNullOrEmpty(qPath)) { return qPath; } if (configuration.isSandboxDefaultsToTaskId()) { return taskId; } return ""; }
@POST @Path("/abort") @ApiOperation("Abort the Mesos scheduler driver.") public void abort() { checkForbidden( configuration.isAllowTestResourceCalls(), "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)"); abort.abort(AbortReason.TEST_ABORT, Optional.<Throwable>absent()); }
@POST @Path("/stop") @ApiOperation("Stop the Mesos scheduler driver.") public void stop() throws Exception { checkForbidden( configuration.isAllowTestResourceCalls(), "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)"); managed.stop(); }
@POST @Path("/notleader") @ApiOperation("Make this instanceo of Singularity believe it's lost leadership.") public void setNotLeader() { checkForbidden( configuration.isAllowTestResourceCalls(), "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)"); managed.notLeader(); }
@POST @Path("/scheduler/statusUpdate/{taskId}/{taskState}") @ApiOperation("Force an update for a specific task.") public void statusUpdate( @PathParam("taskId") String taskId, @PathParam("taskState") String taskState) { checkForbidden( configuration.isAllowTestResourceCalls(), "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)"); driver .getScheduler() .statusUpdate( null, TaskStatus.newBuilder() .setTaskId(TaskID.newBuilder().setValue(taskId)) .setState(TaskState.valueOf(taskState)) .build()); }
public SingularityHealthcheckAsyncHandler( SingularityExceptionNotifier exceptionNotifier, SingularityConfiguration configuration, SingularityHealthchecker healthchecker, SingularityNewTaskChecker newTaskChecker, TaskManager taskManager, SingularityAbort abort, SingularityTask task) { this.exceptionNotifier = exceptionNotifier; this.taskManager = taskManager; this.newTaskChecker = newTaskChecker; this.healthchecker = healthchecker; this.abort = abort; this.task = task; this.maxHealthcheckResponseBodyBytes = configuration.getMaxHealthcheckResponseBodyBytes(); startTime = System.currentTimeMillis(); }
@SuppressWarnings("deprecation") public Optional<MesosFileChunkObject> read( String slaveHostname, String fullPath, Optional<Long> offset, Optional<Long> length) throws SlaveNotFoundException { try { final AsyncHttpClient.BoundRequestBuilder builder = asyncHttpClient .prepareGet(String.format("http://%s:5051/files/read.json", slaveHostname)) .addQueryParameter("path", fullPath); PerRequestConfig timeoutConfig = new PerRequestConfig(); timeoutConfig.setRequestTimeoutInMs((int) configuration.getSandboxHttpTimeoutMillis()); builder.setPerRequestConfig(timeoutConfig); if (offset.isPresent()) { builder.addQueryParameter("offset", offset.get().toString()); } if (length.isPresent()) { builder.addQueryParameter("length", length.get().toString()); } final Response response = builder.execute().get(); if (response.getStatusCode() == 404) { return Optional.absent(); } if (response.getStatusCode() != 200) { throw new RuntimeException( String.format("Got HTTP %s from Mesos slave", response.getStatusCode())); } return Optional.of( objectMapper.readValue(response.getResponseBodyAsStream(), MesosFileChunkObject.class)); } catch (ConnectException ce) { throw new SlaveNotFoundException(ce); } catch (Exception e) { throw Throwables.propagate(e); } }
private Optional<Long> getNextRunAt( SingularityRequest request, RequestState state, SingularityDeployStatistics deployStatistics, PendingType pendingType) { final long now = System.currentTimeMillis(); long nextRunAt = now; if (request.isScheduled()) { if (pendingType == PendingType.IMMEDIATE || pendingType == PendingType.RETRY) { LOG.info("Scheduling requested immediate run of {}", request.getId()); } else { try { Date scheduleFrom = new Date(now); CronExpression cronExpression = new CronExpression(request.getQuartzScheduleSafe()); final Date nextRunAtDate = cronExpression.getNextValidTimeAfter(scheduleFrom); if (nextRunAtDate == null) { return Optional.absent(); } LOG.trace( "Calculating nextRunAtDate for {} (schedule: {}): {} (from: {})", request.getId(), request.getSchedule(), nextRunAtDate, scheduleFrom); nextRunAt = Math.max( nextRunAtDate.getTime(), now); // don't create a schedule that is overdue as this is used to indicate that // singularity is not fulfilling requests. LOG.trace( "Scheduling next run of {} (schedule: {}) at {} (from: {})", request.getId(), request.getSchedule(), nextRunAtDate, scheduleFrom); } catch (ParseException pe) { throw Throwables.propagate(pe); } } } if (pendingType == PendingType.TASK_DONE && request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().or(0L) > 0) { nextRunAt = Math.max( nextRunAt, now + request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get()); LOG.trace( "Adjusted next run of {} to {} (by {}) due to waitAtLeastMillisAfterTaskFinishesForReschedule", request.getId(), nextRunAt, JavaUtils.durationFromMillis( request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get())); } if (state == RequestState.SYSTEM_COOLDOWN && pendingType != PendingType.NEW_DEPLOY) { final long prevNextRunAt = nextRunAt; nextRunAt = Math.max( nextRunAt, now + TimeUnit.SECONDS.toMillis(configuration.getCooldownMinScheduleSeconds())); LOG.trace( "Adjusted next run of {} to {} (from: {}) due to cooldown", request.getId(), nextRunAt, prevNextRunAt); } return Optional.of(nextRunAt); }
private void updateDeployStatistics( SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) { SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder(); if (bldr.getAverageRuntimeMillis().isPresent()) { long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1); bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis)); } else { bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt())); } bldr.setNumTasks(bldr.getNumTasks() + 1); if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) { bldr.setLastFinishAt(Optional.of(timestamp)); bldr.setLastTaskState(Optional.of(state)); } final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps(); final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo()); if (!state.isSuccess()) { if (SingularityTaskHistoryUpdate.getUpdate( taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING) .isPresent()) { LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state); } else { if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) { sequentialFailureTimestamps.add(timestamp); } else if (timestamp > sequentialFailureTimestamps.get(0)) { sequentialFailureTimestamps.set(0, timestamp); } Collections.sort(sequentialFailureTimestamps); } } else { bldr.setNumSuccess(bldr.getNumSuccess() + 1); sequentialFailureTimestamps.clear(); } if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) { bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1); } else { bldr.setNumSequentialRetries(0); } final SingularityDeployStatistics newStatistics = bldr.build(); LOG.trace("Saving new deploy statistics {}", newStatistics); deployManager.saveDeployStatistics(newStatistics); }
public SlaveMatchState doesOfferMatch( Protos.Offer offer, SingularityTaskRequest taskRequest, SingularitySchedulerStateCache stateCache) { final String host = offer.getHostname(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String slaveId = offer.getSlaveId().getValue(); final MachineState currentSlaveState = stateCache.getSlave(slaveId).get().getCurrentState().getState(); if (currentSlaveState == MachineState.FROZEN) { return SlaveMatchState.SLAVE_FROZEN; } if (currentSlaveState.isDecommissioning()) { return SlaveMatchState.SLAVE_DECOMMISSIONING; } final MachineState currentRackState = stateCache.getRack(rackId).get().getCurrentState().getState(); if (currentRackState == MachineState.FROZEN) { return SlaveMatchState.RACK_FROZEN; } if (currentRackState.isDecommissioning()) { return SlaveMatchState.RACK_DECOMMISSIONING; } if (!taskRequest.getRequest().getRackAffinity().or(Collections.<String>emptyList()).isEmpty()) { if (!taskRequest.getRequest().getRackAffinity().get().contains(rackId)) { LOG.trace( "Task {} requires a rack in {} (current rack {})", taskRequest.getPendingTask().getPendingTaskId(), taskRequest.getRequest().getRackAffinity().get(), rackId); return SlaveMatchState.RACK_AFFINITY_NOT_MATCHING; } } final SlavePlacement slavePlacement = taskRequest.getRequest().getSlavePlacement().or(configuration.getDefaultSlavePlacement()); if (!taskRequest.getRequest().isRackSensitive() && slavePlacement == SlavePlacement.GREEDY) { return SlaveMatchState.NOT_RACK_OR_SLAVE_PARTICULAR; } final int numDesiredInstances = taskRequest.getRequest().getInstancesSafe(); double numOnRack = 0; double numOnSlave = 0; double numCleaningOnSlave = 0; double numOtherDeploysOnSlave = 0; final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(host); final String sanitizedRackId = JavaUtils.getReplaceHyphensWithUnderscores(rackId); Collection<SingularityTaskId> cleaningTasks = stateCache.getCleaningTasks(); for (SingularityTaskId taskId : SingularityTaskId.matchingAndNotIn( stateCache.getActiveTaskIds(), taskRequest.getRequest().getId(), Collections.<SingularityTaskId>emptyList())) { // TODO consider using executorIds if (taskId.getSanitizedHost().equals(sanitizedHost)) { if (taskRequest.getDeploy().getId().equals(taskId.getDeployId())) { if (cleaningTasks.contains(taskId)) { numCleaningOnSlave++; } else { numOnSlave++; } } else { numOtherDeploysOnSlave++; } } if (taskId.getSanitizedRackId().equals(sanitizedRackId) && !cleaningTasks.contains(taskId) && taskRequest.getDeploy().getId().equals(taskId.getDeployId())) { numOnRack++; } } if (taskRequest.getRequest().isRackSensitive()) { final double numPerRack = numDesiredInstances / (double) stateCache.getNumActiveRacks(); final boolean isRackOk = numOnRack < numPerRack; if (!isRackOk) { LOG.trace( "Rejecting RackSensitive task {} from slave {} ({}) due to numOnRack {} and cleaningOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnRack, numCleaningOnSlave); return SlaveMatchState.RACK_SATURATED; } } switch (slavePlacement) { case SEPARATE: case SEPARATE_BY_DEPLOY: if (numOnSlave > 0 || numCleaningOnSlave > 0) { LOG.trace( "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case SEPARATE_BY_REQUEST: if (numOnSlave > 0 || numCleaningOnSlave > 0 || numOtherDeploysOnSlave > 0) { LOG.trace( "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {} numOtherDeploysOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave, numOtherDeploysOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case OPTIMISTIC: final double numPerSlave = numDesiredInstances / (double) stateCache.getNumActiveSlaves(); final boolean isSlaveOk = numOnSlave < numPerSlave; if (!isSlaveOk) { LOG.trace( "Rejecting OPTIMISTIC task {} from slave {} ({}) due to numOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case GREEDY: } return SlaveMatchState.OK; }