コード例 #1
0
  private void checkRackAfterSlaveLoss(SingularitySlave lostSlave) {
    List<SingularitySlave> slaves = slaveManager.getObjectsFiltered(MachineState.ACTIVE);

    int numInRack = 0;

    for (SingularitySlave slave : slaves) {
      if (slave.getRackId().equals(lostSlave.getRackId())) {
        numInRack++;
      }
    }

    LOG.info("Found {} slaves left in rack {}", numInRack, lostSlave.getRackId());

    if (numInRack == 0) {
      rackManager.changeState(lostSlave.getRackId(), MachineState.DEAD, Optional.<String>absent());
    }
  }
コード例 #2
0
  public void checkForDecomissions(SingularitySchedulerStateCache stateCache) {
    final long start = System.currentTimeMillis();

    final Set<String> requestIdsToReschedule = Sets.newHashSet();
    final Set<SingularityTaskId> matchingTaskIds = Sets.newHashSet();

    final Collection<SingularityTaskId> activeTaskIds = stateCache.getActiveTaskIds();

    final Map<SingularitySlave, MachineState> slaves =
        getDefaultMap(slaveManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));

    for (SingularitySlave slave : slaves.keySet()) {
      boolean foundTask = false;

      for (SingularityTask activeTask : taskManager.getTasksOnSlave(activeTaskIds, slave)) {
        cleanupTaskDueToDecomission(requestIdsToReschedule, matchingTaskIds, activeTask, slave);
        foundTask = true;
      }

      if (!foundTask) {
        slaves.put(slave, MachineState.DECOMMISSIONED);
      }
    }

    final Map<SingularityRack, MachineState> racks =
        getDefaultMap(rackManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));

    for (SingularityRack rack : racks.keySet()) {
      boolean foundTask = false;

      for (SingularityTaskId activeTaskId : activeTaskIds) {
        if (rack.getId().equals(activeTaskId.getRackId())) {
          foundTask = true;
        }

        if (matchingTaskIds.contains(activeTaskId)) {
          continue;
        }

        if (rack.getId().equals(activeTaskId.getRackId())) {
          Optional<SingularityTask> maybeTask = taskManager.getTask(activeTaskId);
          cleanupTaskDueToDecomission(
              requestIdsToReschedule, matchingTaskIds, maybeTask.get(), rack);
        }
      }

      if (!foundTask) {
        racks.put(rack, MachineState.DECOMMISSIONED);
      }
    }

    for (String requestId : requestIdsToReschedule) {
      LOG.trace("Rescheduling request {} due to decomissions", requestId);

      Optional<String> maybeDeployId = deployManager.getInUseDeployId(requestId);

      if (maybeDeployId.isPresent()) {
        requestManager.addToPendingQueue(
            new SingularityPendingRequest(
                requestId, maybeDeployId.get(), start, PendingType.DECOMISSIONED_SLAVE_OR_RACK));
      } else {
        LOG.warn("Not rescheduling a request ({}) because of no active deploy", requestId);
      }
    }

    changeState(slaves, slaveManager);
    changeState(racks, rackManager);

    if (slaves.isEmpty()
        && racks.isEmpty()
        && requestIdsToReschedule.isEmpty()
        && matchingTaskIds.isEmpty()) {
      LOG.trace("Decomission check found nothing");
    } else {
      LOG.info(
          "Found {} decomissioning slaves, {} decomissioning racks, rescheduling {} requests and scheduling {} tasks for cleanup in {}",
          slaves.size(),
          racks.size(),
          requestIdsToReschedule.size(),
          matchingTaskIds.size(),
          JavaUtils.duration(start));
    }
  }