private void checkRackAfterSlaveLoss(SingularitySlave lostSlave) { List<SingularitySlave> slaves = slaveManager.getObjectsFiltered(MachineState.ACTIVE); int numInRack = 0; for (SingularitySlave slave : slaves) { if (slave.getRackId().equals(lostSlave.getRackId())) { numInRack++; } } LOG.info("Found {} slaves left in rack {}", numInRack, lostSlave.getRackId()); if (numInRack == 0) { rackManager.changeState(lostSlave.getRackId(), MachineState.DEAD, Optional.<String>absent()); } }
public void checkForDecomissions(SingularitySchedulerStateCache stateCache) { final long start = System.currentTimeMillis(); final Set<String> requestIdsToReschedule = Sets.newHashSet(); final Set<SingularityTaskId> matchingTaskIds = Sets.newHashSet(); final Collection<SingularityTaskId> activeTaskIds = stateCache.getActiveTaskIds(); final Map<SingularitySlave, MachineState> slaves = getDefaultMap(slaveManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION)); for (SingularitySlave slave : slaves.keySet()) { boolean foundTask = false; for (SingularityTask activeTask : taskManager.getTasksOnSlave(activeTaskIds, slave)) { cleanupTaskDueToDecomission(requestIdsToReschedule, matchingTaskIds, activeTask, slave); foundTask = true; } if (!foundTask) { slaves.put(slave, MachineState.DECOMMISSIONED); } } final Map<SingularityRack, MachineState> racks = getDefaultMap(rackManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION)); for (SingularityRack rack : racks.keySet()) { boolean foundTask = false; for (SingularityTaskId activeTaskId : activeTaskIds) { if (rack.getId().equals(activeTaskId.getRackId())) { foundTask = true; } if (matchingTaskIds.contains(activeTaskId)) { continue; } if (rack.getId().equals(activeTaskId.getRackId())) { Optional<SingularityTask> maybeTask = taskManager.getTask(activeTaskId); cleanupTaskDueToDecomission( requestIdsToReschedule, matchingTaskIds, maybeTask.get(), rack); } } if (!foundTask) { racks.put(rack, MachineState.DECOMMISSIONED); } } for (String requestId : requestIdsToReschedule) { LOG.trace("Rescheduling request {} due to decomissions", requestId); Optional<String> maybeDeployId = deployManager.getInUseDeployId(requestId); if (maybeDeployId.isPresent()) { requestManager.addToPendingQueue( new SingularityPendingRequest( requestId, maybeDeployId.get(), start, PendingType.DECOMISSIONED_SLAVE_OR_RACK)); } else { LOG.warn("Not rescheduling a request ({}) because of no active deploy", requestId); } } changeState(slaves, slaveManager); changeState(racks, rackManager); if (slaves.isEmpty() && racks.isEmpty() && requestIdsToReschedule.isEmpty() && matchingTaskIds.isEmpty()) { LOG.trace("Decomission check found nothing"); } else { LOG.info( "Found {} decomissioning slaves, {} decomissioning racks, rescheduling {} requests and scheduling {} tasks for cleanup in {}", slaves.size(), racks.size(), requestIdsToReschedule.size(), matchingTaskIds.size(), JavaUtils.duration(start)); } }