private boolean isHostAlreadyRunningTask(Protos.Offer offer) { Boolean result = false; List<Protos.TaskInfo> stateList = clusterState.getTaskList(); for (Protos.TaskInfo t : stateList) { if (t.getSlaveId().equals(offer.getSlaveId())) { result = true; } } return result; }
@Override public void resourceOffers(SchedulerDriver schedulerDriver, List<Protos.Offer> list) { double CPUS_PER_TASK = 1; double MEM_PER_TASK = 128; for (Protos.Offer offer : list) { List<Protos.TaskInfo> taskInfoList = new ArrayList<Protos.TaskInfo>(); double offerCpus = 0; double offerMem = 0; for (Protos.Resource resource : offer.getResourcesList()) { if (resource.getName().equals("cpus")) { offerCpus += resource.getScalar().getValue(); } else if (resource.getName().equals("mem")) { offerMem += resource.getScalar().getValue(); } } LOGGER.info( "Received Offer : " + offer.getId().getValue() + " with cpus = " + offerCpus + " and mem =" + offerMem); double remainingCpus = offerCpus; double remainingMem = offerMem; if (launchedTasks < totalTasks && remainingCpus >= CPUS_PER_TASK && remainingMem >= MEM_PER_TASK) { Protos.TaskID taskID = Protos.TaskID.newBuilder().setValue(Integer.toString(launchedTasks++)).build(); LOGGER.info( "Launching task :" + taskID.getValue() + " using the offer : " + offer.getId().getValue()); Protos.TaskInfo piTaskInfo = Protos.TaskInfo.newBuilder() .setName("task " + taskID.getValue()) .setTaskId(taskID) .setSlaveId(offer.getSlaveId()) .addResources( Protos.Resource.newBuilder() .setName("cpus") .setType(Protos.Value.Type.SCALAR) .setScalar(Protos.Value.Scalar.newBuilder().setValue(CPUS_PER_TASK))) .addResources( Protos.Resource.newBuilder() .setName("mem") .setType(Protos.Value.Type.SCALAR) .setScalar(Protos.Value.Scalar.newBuilder().setValue(MEM_PER_TASK))) .setExecutor(Protos.ExecutorInfo.newBuilder(piExecutor)) .build(); taskID = Protos.TaskID.newBuilder().setValue(Integer.toString(launchedTasks++)).build(); LOGGER.info( "Launching task :" + taskID.getValue() + " using the offer : " + offer.getId().getValue()); taskInfoList.add(piTaskInfo); } schedulerDriver.launchTasks(offer.getId(), taskInfoList); } }
public SlaveMatchState doesOfferMatch( Protos.Offer offer, SingularityTaskRequest taskRequest, SingularitySchedulerStateCache stateCache) { final String host = offer.getHostname(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String slaveId = offer.getSlaveId().getValue(); final MachineState currentSlaveState = stateCache.getSlave(slaveId).get().getCurrentState().getState(); if (currentSlaveState == MachineState.FROZEN) { return SlaveMatchState.SLAVE_FROZEN; } if (currentSlaveState.isDecommissioning()) { return SlaveMatchState.SLAVE_DECOMMISSIONING; } final MachineState currentRackState = stateCache.getRack(rackId).get().getCurrentState().getState(); if (currentRackState == MachineState.FROZEN) { return SlaveMatchState.RACK_FROZEN; } if (currentRackState.isDecommissioning()) { return SlaveMatchState.RACK_DECOMMISSIONING; } if (!taskRequest.getRequest().getRackAffinity().or(Collections.<String>emptyList()).isEmpty()) { if (!taskRequest.getRequest().getRackAffinity().get().contains(rackId)) { LOG.trace( "Task {} requires a rack in {} (current rack {})", taskRequest.getPendingTask().getPendingTaskId(), taskRequest.getRequest().getRackAffinity().get(), rackId); return SlaveMatchState.RACK_AFFINITY_NOT_MATCHING; } } final SlavePlacement slavePlacement = taskRequest.getRequest().getSlavePlacement().or(configuration.getDefaultSlavePlacement()); if (!taskRequest.getRequest().isRackSensitive() && slavePlacement == SlavePlacement.GREEDY) { return SlaveMatchState.NOT_RACK_OR_SLAVE_PARTICULAR; } final int numDesiredInstances = taskRequest.getRequest().getInstancesSafe(); double numOnRack = 0; double numOnSlave = 0; double numCleaningOnSlave = 0; double numOtherDeploysOnSlave = 0; final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(host); final String sanitizedRackId = JavaUtils.getReplaceHyphensWithUnderscores(rackId); Collection<SingularityTaskId> cleaningTasks = stateCache.getCleaningTasks(); for (SingularityTaskId taskId : SingularityTaskId.matchingAndNotIn( stateCache.getActiveTaskIds(), taskRequest.getRequest().getId(), Collections.<SingularityTaskId>emptyList())) { // TODO consider using executorIds if (taskId.getSanitizedHost().equals(sanitizedHost)) { if (taskRequest.getDeploy().getId().equals(taskId.getDeployId())) { if (cleaningTasks.contains(taskId)) { numCleaningOnSlave++; } else { numOnSlave++; } } else { numOtherDeploysOnSlave++; } } if (taskId.getSanitizedRackId().equals(sanitizedRackId) && !cleaningTasks.contains(taskId) && taskRequest.getDeploy().getId().equals(taskId.getDeployId())) { numOnRack++; } } if (taskRequest.getRequest().isRackSensitive()) { final double numPerRack = numDesiredInstances / (double) stateCache.getNumActiveRacks(); final boolean isRackOk = numOnRack < numPerRack; if (!isRackOk) { LOG.trace( "Rejecting RackSensitive task {} from slave {} ({}) due to numOnRack {} and cleaningOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnRack, numCleaningOnSlave); return SlaveMatchState.RACK_SATURATED; } } switch (slavePlacement) { case SEPARATE: case SEPARATE_BY_DEPLOY: if (numOnSlave > 0 || numCleaningOnSlave > 0) { LOG.trace( "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case SEPARATE_BY_REQUEST: if (numOnSlave > 0 || numCleaningOnSlave > 0 || numOtherDeploysOnSlave > 0) { LOG.trace( "Rejecting SEPARATE task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {} numOtherDeploysOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave, numOtherDeploysOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case OPTIMISTIC: final double numPerSlave = numDesiredInstances / (double) stateCache.getNumActiveSlaves(); final boolean isSlaveOk = numOnSlave < numPerSlave; if (!isSlaveOk) { LOG.trace( "Rejecting OPTIMISTIC task {} from slave {} ({}) due to numOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave); return SlaveMatchState.SLAVE_SATURATED; } break; case GREEDY: } return SlaveMatchState.OK; }
@Override public void resourceOffers(SchedulerDriver driver, List<Protos.Offer> offers) { long masterCpu = Configuration.getInt(PropertyKey.INTEGRATION_MASTER_RESOURCE_CPU); long masterMem = Configuration.getBytes(PropertyKey.INTEGRATION_MASTER_RESOURCE_MEM) / Constants.MB; long workerCpu = Configuration.getInt(PropertyKey.INTEGRATION_WORKER_RESOURCE_CPU); long workerMem = Configuration.getBytes(PropertyKey.INTEGRATION_WORKER_RESOURCE_MEM) / Constants.MB; LOG.info( "Master launched {}, master count {}, " + "requested master cpu {} mem {} MB and required master hostname {}", mMasterLaunched, mMasterCount, masterCpu, masterMem, mRequiredMasterHostname); for (Protos.Offer offer : offers) { Protos.Offer.Operation.Launch.Builder launch = Protos.Offer.Operation.Launch.newBuilder(); double offerCpu = 0; double offerMem = 0; for (Protos.Resource resource : offer.getResourcesList()) { if (resource.getName().equals(Constants.MESOS_RESOURCE_CPUS)) { offerCpu += resource.getScalar().getValue(); } else if (resource.getName().equals(Constants.MESOS_RESOURCE_MEM)) { offerMem += resource.getScalar().getValue(); } else { // Other resources are currently ignored. } } LOG.info( "Received offer {} on host {} with cpus {} and mem {} MB and hasMasterPorts {}", offer.getId().getValue(), offer.getHostname(), offerCpu, offerMem, OfferUtils.hasAvailableMasterPorts(offer)); Protos.ExecutorInfo.Builder executorBuilder = Protos.ExecutorInfo.newBuilder(); List<Protos.Resource> resources; if (!mMasterLaunched && offerCpu >= masterCpu && offerMem >= masterMem && mMasterCount < Configuration.getInt(PropertyKey.INTEGRATION_MESOS_ALLUXIO_MASTER_NODE_COUNT) && OfferUtils.hasAvailableMasterPorts(offer) && (mRequiredMasterHostname == null || mRequiredMasterHostname.equals(offer.getHostname()))) { LOG.debug("Creating Alluxio Master executor"); executorBuilder .setName("Alluxio Master Executor") .setSource("master") .setExecutorId(Protos.ExecutorID.newBuilder().setValue("master")) .addAllResources(getExecutorResources()) .setCommand( Protos.CommandInfo.newBuilder() .setValue(createStartAlluxioCommand("alluxio-master-mesos.sh")) .addAllUris(getExecutorDependencyURIList()) .setEnvironment( Protos.Environment.newBuilder() .addVariables( Protos.Environment.Variable.newBuilder() .setName("ALLUXIO_UNDERFS_ADDRESS") .setValue(Configuration.get(PropertyKey.UNDERFS_ADDRESS)) .build()) .build())); // pre-build resource list here, then use it to build Protos.Task later. resources = getMasterRequiredResources(masterCpu, masterMem); mMasterHostname = offer.getHostname(); mTaskName = Configuration.get(PropertyKey.INTEGRATION_MESOS_ALLUXIO_MASTER_NAME); mMasterCount++; mMasterTaskId = mLaunchedTasks; } else if (mMasterLaunched && !mWorkers.contains(offer.getHostname()) && offerCpu >= workerCpu && offerMem >= workerMem && OfferUtils.hasAvailableWorkerPorts(offer)) { LOG.debug("Creating Alluxio Worker executor"); final String memSize = FormatUtils.getSizeFromBytes((long) workerMem * Constants.MB); executorBuilder .setName("Alluxio Worker Executor") .setSource("worker") .setExecutorId(Protos.ExecutorID.newBuilder().setValue("worker")) .addAllResources(getExecutorResources()) .setCommand( Protos.CommandInfo.newBuilder() .setValue(createStartAlluxioCommand("alluxio-worker-mesos.sh")) .addAllUris(getExecutorDependencyURIList()) .setEnvironment( Protos.Environment.newBuilder() .addVariables( Protos.Environment.Variable.newBuilder() .setName("ALLUXIO_MASTER_HOSTNAME") .setValue(mMasterHostname) .build()) .addVariables( Protos.Environment.Variable.newBuilder() .setName("ALLUXIO_WORKER_MEMORY_SIZE") .setValue(memSize) .build()) .addVariables( Protos.Environment.Variable.newBuilder() .setName("ALLUXIO_UNDERFS_ADDRESS") .setValue(Configuration.get(PropertyKey.UNDERFS_ADDRESS)) .build()) .build())); // pre-build resource list here, then use it to build Protos.Task later. resources = getWorkerRequiredResources(workerCpu, workerMem); mWorkers.add(offer.getHostname()); mTaskName = Configuration.get(PropertyKey.INTEGRATION_MESOS_ALLUXIO_WORKER_NAME); } else { // The resource offer cannot be used to start either master or a worker. LOG.info("Declining offer {}", offer.getId().getValue()); driver.declineOffer(offer.getId()); continue; } Protos.TaskID taskId = Protos.TaskID.newBuilder().setValue(String.valueOf(mLaunchedTasks)).build(); LOG.info("Launching task {} using offer {}", taskId.getValue(), offer.getId().getValue()); Protos.TaskInfo task = Protos.TaskInfo.newBuilder() .setName(mTaskName) .setTaskId(taskId) .setSlaveId(offer.getSlaveId()) .addAllResources(resources) .setExecutor(executorBuilder) .build(); launch.addTaskInfos(Protos.TaskInfo.newBuilder(task)); mLaunchedTasks++; // NOTE: We use the new API `acceptOffers` here to launch tasks. // The 'launchTasks' API will be deprecated. List<Protos.OfferID> offerIds = new ArrayList<Protos.OfferID>(); offerIds.add(offer.getId()); List<Protos.Offer.Operation> operations = new ArrayList<Protos.Offer.Operation>(); Protos.Offer.Operation operation = Protos.Offer.Operation.newBuilder() .setType(Protos.Offer.Operation.Type.LAUNCH) .setLaunch(launch) .build(); operations.add(operation); Protos.Filters filters = Protos.Filters.newBuilder().setRefuseSeconds(1).build(); driver.acceptOffers(offerIds, operations, filters); } }