private String getDefaultPath(String taskId, String qPath) {
   if (!Strings.isNullOrEmpty(qPath)) {
     return qPath;
   }
   if (configuration.isSandboxDefaultsToTaskId()) {
     return taskId;
   }
   return "";
 }
  @POST
  @Path("/abort")
  @ApiOperation("Abort the Mesos scheduler driver.")
  public void abort() {
    checkForbidden(
        configuration.isAllowTestResourceCalls(),
        "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)");

    abort.abort(AbortReason.TEST_ABORT, Optional.<Throwable>absent());
  }
  @POST
  @Path("/stop")
  @ApiOperation("Stop the Mesos scheduler driver.")
  public void stop() throws Exception {
    checkForbidden(
        configuration.isAllowTestResourceCalls(),
        "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)");

    managed.stop();
  }
  @POST
  @Path("/notleader")
  @ApiOperation("Make this instanceo of Singularity believe it's lost leadership.")
  public void setNotLeader() {
    checkForbidden(
        configuration.isAllowTestResourceCalls(),
        "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)");

    managed.notLeader();
  }
  @POST
  @Path("/scheduler/statusUpdate/{taskId}/{taskState}")
  @ApiOperation("Force an update for a specific task.")
  public void statusUpdate(
      @PathParam("taskId") String taskId, @PathParam("taskState") String taskState) {
    checkForbidden(
        configuration.isAllowTestResourceCalls(),
        "Test resource calls are disabled (set isAllowTestResourceCalls to true in configuration)");

    driver
        .getScheduler()
        .statusUpdate(
            null,
            TaskStatus.newBuilder()
                .setTaskId(TaskID.newBuilder().setValue(taskId))
                .setState(TaskState.valueOf(taskState))
                .build());
  }
  public SingularityHealthcheckAsyncHandler(
      SingularityExceptionNotifier exceptionNotifier,
      SingularityConfiguration configuration,
      SingularityHealthchecker healthchecker,
      SingularityNewTaskChecker newTaskChecker,
      TaskManager taskManager,
      SingularityAbort abort,
      SingularityTask task) {
    this.exceptionNotifier = exceptionNotifier;
    this.taskManager = taskManager;
    this.newTaskChecker = newTaskChecker;
    this.healthchecker = healthchecker;
    this.abort = abort;
    this.task = task;
    this.maxHealthcheckResponseBodyBytes = configuration.getMaxHealthcheckResponseBodyBytes();

    startTime = System.currentTimeMillis();
  }
Example #7
0
  @SuppressWarnings("deprecation")
  public Optional<MesosFileChunkObject> read(
      String slaveHostname, String fullPath, Optional<Long> offset, Optional<Long> length)
      throws SlaveNotFoundException {
    try {
      final AsyncHttpClient.BoundRequestBuilder builder =
          asyncHttpClient
              .prepareGet(String.format("http://%s:5051/files/read.json", slaveHostname))
              .addQueryParameter("path", fullPath);

      PerRequestConfig timeoutConfig = new PerRequestConfig();
      timeoutConfig.setRequestTimeoutInMs((int) configuration.getSandboxHttpTimeoutMillis());
      builder.setPerRequestConfig(timeoutConfig);

      if (offset.isPresent()) {
        builder.addQueryParameter("offset", offset.get().toString());
      }

      if (length.isPresent()) {
        builder.addQueryParameter("length", length.get().toString());
      }

      final Response response = builder.execute().get();

      if (response.getStatusCode() == 404) {
        return Optional.absent();
      }

      if (response.getStatusCode() != 200) {
        throw new RuntimeException(
            String.format("Got HTTP %s from Mesos slave", response.getStatusCode()));
      }

      return Optional.of(
          objectMapper.readValue(response.getResponseBodyAsStream(), MesosFileChunkObject.class));
    } catch (ConnectException ce) {
      throw new SlaveNotFoundException(ce);
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }
  private Optional<Long> getNextRunAt(
      SingularityRequest request,
      RequestState state,
      SingularityDeployStatistics deployStatistics,
      PendingType pendingType) {
    final long now = System.currentTimeMillis();

    long nextRunAt = now;

    if (request.isScheduled()) {
      if (pendingType == PendingType.IMMEDIATE || pendingType == PendingType.RETRY) {
        LOG.info("Scheduling requested immediate run of {}", request.getId());
      } else {
        try {
          Date scheduleFrom = new Date(now);

          CronExpression cronExpression = new CronExpression(request.getQuartzScheduleSafe());

          final Date nextRunAtDate = cronExpression.getNextValidTimeAfter(scheduleFrom);

          if (nextRunAtDate == null) {
            return Optional.absent();
          }

          LOG.trace(
              "Calculating nextRunAtDate for {} (schedule: {}): {} (from: {})",
              request.getId(),
              request.getSchedule(),
              nextRunAtDate,
              scheduleFrom);

          nextRunAt =
              Math.max(
                  nextRunAtDate.getTime(),
                  now); // don't create a schedule that is overdue as this is used to indicate that
                        // singularity is not fulfilling requests.

          LOG.trace(
              "Scheduling next run of {} (schedule: {}) at {} (from: {})",
              request.getId(),
              request.getSchedule(),
              nextRunAtDate,
              scheduleFrom);
        } catch (ParseException pe) {
          throw Throwables.propagate(pe);
        }
      }
    }

    if (pendingType == PendingType.TASK_DONE
        && request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().or(0L) > 0) {
      nextRunAt =
          Math.max(
              nextRunAt, now + request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get());

      LOG.trace(
          "Adjusted next run of {} to {} (by {}) due to waitAtLeastMillisAfterTaskFinishesForReschedule",
          request.getId(),
          nextRunAt,
          JavaUtils.durationFromMillis(
              request.getWaitAtLeastMillisAfterTaskFinishesForReschedule().get()));
    }

    if (state == RequestState.SYSTEM_COOLDOWN && pendingType != PendingType.NEW_DEPLOY) {
      final long prevNextRunAt = nextRunAt;
      nextRunAt =
          Math.max(
              nextRunAt,
              now + TimeUnit.SECONDS.toMillis(configuration.getCooldownMinScheduleSeconds()));
      LOG.trace(
          "Adjusted next run of {} to {} (from: {}) due to cooldown",
          request.getId(),
          nextRunAt,
          prevNextRunAt);
    }

    return Optional.of(nextRunAt);
  }
  private void updateDeployStatistics(
      SingularityDeployStatistics deployStatistics,
      SingularityTaskId taskId,
      long timestamp,
      ExtendedTaskState state,
      Optional<PendingType> scheduleResult) {
    SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();

    if (bldr.getAverageRuntimeMillis().isPresent()) {
      long newAvgRuntimeMillis =
          (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks()
                  + (timestamp - taskId.getStartedAt()))
              / (bldr.getNumTasks() + 1);

      bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
    } else {
      bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
    }

    bldr.setNumTasks(bldr.getNumTasks() + 1);

    if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
      bldr.setLastFinishAt(Optional.of(timestamp));
      bldr.setLastTaskState(Optional.of(state));
    }

    final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps =
        bldr.getInstanceSequentialFailureTimestamps();
    final List<Long> sequentialFailureTimestamps =
        instanceSequentialFailureTimestamps.get(taskId.getInstanceNo());

    if (!state.isSuccess()) {
      if (SingularityTaskHistoryUpdate.getUpdate(
              taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING)
          .isPresent()) {
        LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
      } else {

        if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) {
          sequentialFailureTimestamps.add(timestamp);
        } else if (timestamp > sequentialFailureTimestamps.get(0)) {
          sequentialFailureTimestamps.set(0, timestamp);
        }

        Collections.sort(sequentialFailureTimestamps);
      }
    } else {
      bldr.setNumSuccess(bldr.getNumSuccess() + 1);
      sequentialFailureTimestamps.clear();
    }

    if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
      bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
    } else {
      bldr.setNumSequentialRetries(0);
    }

    final SingularityDeployStatistics newStatistics = bldr.build();

    LOG.trace("Saving new deploy statistics {}", newStatistics);

    deployManager.saveDeployStatistics(newStatistics);
  }
  public SlaveMatchState doesOfferMatch(
      Protos.Offer offer,
      SingularityTaskRequest taskRequest,
      SingularitySchedulerStateCache stateCache) {
    final String host = offer.getHostname();
    final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer);
    final String slaveId = offer.getSlaveId().getValue();

    final MachineState currentSlaveState =
        stateCache.getSlave(slaveId).get().getCurrentState().getState();

    if (currentSlaveState == MachineState.FROZEN) {
      return SlaveMatchState.SLAVE_FROZEN;
    }

    if (currentSlaveState.isDecommissioning()) {
      return SlaveMatchState.SLAVE_DECOMMISSIONING;
    }

    final MachineState currentRackState =
        stateCache.getRack(rackId).get().getCurrentState().getState();

    if (currentRackState == MachineState.FROZEN) {
      return SlaveMatchState.RACK_FROZEN;
    }

    if (currentRackState.isDecommissioning()) {
      return SlaveMatchState.RACK_DECOMMISSIONING;
    }

    if (!taskRequest.getRequest().getRackAffinity().or(Collections.<String>emptyList()).isEmpty()) {
      if (!taskRequest.getRequest().getRackAffinity().get().contains(rackId)) {
        LOG.trace(
            "Task {} requires a rack in {} (current rack {})",
            taskRequest.getPendingTask().getPendingTaskId(),
            taskRequest.getRequest().getRackAffinity().get(),
            rackId);
        return SlaveMatchState.RACK_AFFINITY_NOT_MATCHING;
      }
    }

    final SlavePlacement slavePlacement =
        taskRequest.getRequest().getSlavePlacement().or(configuration.getDefaultSlavePlacement());

    if (!taskRequest.getRequest().isRackSensitive() && slavePlacement == SlavePlacement.GREEDY) {
      return SlaveMatchState.NOT_RACK_OR_SLAVE_PARTICULAR;
    }

    final int numDesiredInstances = taskRequest.getRequest().getInstancesSafe();
    double numOnRack = 0;
    double numOnSlave = 0;
    double numCleaningOnSlave = 0;
    double numOtherDeploysOnSlave = 0;

    final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(host);
    final String sanitizedRackId = JavaUtils.getReplaceHyphensWithUnderscores(rackId);
    Collection<SingularityTaskId> cleaningTasks = stateCache.getCleaningTasks();

    for (SingularityTaskId taskId :
        SingularityTaskId.matchingAndNotIn(
            stateCache.getActiveTaskIds(),
            taskRequest.getRequest().getId(),
            Collections.<SingularityTaskId>emptyList())) {
      // TODO consider using executorIds
      if (taskId.getSanitizedHost().equals(sanitizedHost)) {
        if (taskRequest.getDeploy().getId().equals(taskId.getDeployId())) {
          if (cleaningTasks.contains(taskId)) {
            numCleaningOnSlave++;
          } else {
            numOnSlave++;
          }
        } else {
          numOtherDeploysOnSlave++;
        }
      }
      if (taskId.getSanitizedRackId().equals(sanitizedRackId)
          && !cleaningTasks.contains(taskId)
          && taskRequest.getDeploy().getId().equals(taskId.getDeployId())) {
        numOnRack++;
      }
    }

    if (taskRequest.getRequest().isRackSensitive()) {
      final double numPerRack = numDesiredInstances / (double) stateCache.getNumActiveRacks();

      final boolean isRackOk = numOnRack < numPerRack;

      if (!isRackOk) {
        LOG.trace(
            "Rejecting RackSensitive task {} from slave {} ({}) due to numOnRack {} and cleaningOnSlave {}",
            taskRequest.getRequest().getId(),
            slaveId,
            host,
            numOnRack,
            numCleaningOnSlave);
        return SlaveMatchState.RACK_SATURATED;
      }
    }

    switch (slavePlacement) {
      case SEPARATE:
      case SEPARATE_BY_DEPLOY:
        if (numOnSlave > 0 || numCleaningOnSlave > 0) {
          LOG.trace(
              "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {}",
              taskRequest.getRequest().getId(),
              slaveId,
              host,
              numOnSlave,
              numCleaningOnSlave);
          return SlaveMatchState.SLAVE_SATURATED;
        }
        break;
      case SEPARATE_BY_REQUEST:
        if (numOnSlave > 0 || numCleaningOnSlave > 0 || numOtherDeploysOnSlave > 0) {
          LOG.trace(
              "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {} numOtherDeploysOnSlave {}",
              taskRequest.getRequest().getId(),
              slaveId,
              host,
              numOnSlave,
              numCleaningOnSlave,
              numOtherDeploysOnSlave);
          return SlaveMatchState.SLAVE_SATURATED;
        }
        break;
      case OPTIMISTIC:
        final double numPerSlave = numDesiredInstances / (double) stateCache.getNumActiveSlaves();

        final boolean isSlaveOk = numOnSlave < numPerSlave;

        if (!isSlaveOk) {
          LOG.trace(
              "Rejecting OPTIMISTIC task {} from slave {} ({}) due to numOnSlave {}",
              taskRequest.getRequest().getId(),
              slaveId,
              host,
              numOnSlave);
          return SlaveMatchState.SLAVE_SATURATED;
        }
        break;
      case GREEDY:
    }

    return SlaveMatchState.OK;
  }