/** * Return the level at which we are allowed to schedule containers. Given the thresholds * indicating how much time passed before relaxing scheduling constraints. */ public synchronized NodeType getAllowedLocalityLevelByTime( Priority priority, long nodeLocalityDelayMs, long rackLocalityDelayMs, long currentTimeMs) { // if not being used, can schedule anywhere if (nodeLocalityDelayMs < 0 || rackLocalityDelayMs < 0) { return NodeType.OFF_SWITCH; } // default level is NODE_LOCAL if (!allowedLocalityLevel.containsKey(priority)) { allowedLocalityLevel.put(priority, NodeType.NODE_LOCAL); return NodeType.NODE_LOCAL; } NodeType allowed = allowedLocalityLevel.get(priority); // if level is already most liberal, we're done if (allowed.equals(NodeType.OFF_SWITCH)) { return NodeType.OFF_SWITCH; } // check waiting time long waitTime = currentTimeMs; if (lastScheduledContainer.containsKey(priority)) { waitTime -= lastScheduledContainer.get(priority); } else { waitTime -= getStartTime(); } long thresholdTime = allowed.equals(NodeType.NODE_LOCAL) ? nodeLocalityDelayMs : rackLocalityDelayMs; if (waitTime > thresholdTime) { if (allowed.equals(NodeType.NODE_LOCAL)) { allowedLocalityLevel.put(priority, NodeType.RACK_LOCAL); resetSchedulingOpportunities(priority, currentTimeMs); } else if (allowed.equals(NodeType.RACK_LOCAL)) { allowedLocalityLevel.put(priority, NodeType.OFF_SWITCH); resetSchedulingOpportunities(priority, currentTimeMs); } } return allowedLocalityLevel.get(priority); }
/** * Return the level at which we are allowed to schedule containers, given the current size of the * cluster and thresholds indicating how many nodes to fail at (as a fraction of cluster size) * before relaxing scheduling constraints. */ public synchronized NodeType getAllowedLocalityLevel( Priority priority, int numNodes, double nodeLocalityThreshold, double rackLocalityThreshold) { // upper limit on threshold if (nodeLocalityThreshold > 1.0) { nodeLocalityThreshold = 1.0; } if (rackLocalityThreshold > 1.0) { rackLocalityThreshold = 1.0; } // If delay scheduling is not being used, can schedule anywhere if (nodeLocalityThreshold < 0.0 || rackLocalityThreshold < 0.0) { return NodeType.OFF_SWITCH; } // Default level is NODE_LOCAL if (!allowedLocalityLevel.containsKey(priority)) { allowedLocalityLevel.put(priority, NodeType.NODE_LOCAL); return NodeType.NODE_LOCAL; } NodeType allowed = allowedLocalityLevel.get(priority); // If level is already most liberal, we're done if (allowed.equals(NodeType.OFF_SWITCH)) return NodeType.OFF_SWITCH; double threshold = allowed.equals(NodeType.NODE_LOCAL) ? nodeLocalityThreshold : rackLocalityThreshold; // Relax locality constraints once we've surpassed threshold. if (getSchedulingOpportunities(priority) > (numNodes * threshold)) { if (allowed.equals(NodeType.NODE_LOCAL)) { allowedLocalityLevel.put(priority, NodeType.RACK_LOCAL); resetSchedulingOpportunities(priority); } else if (allowed.equals(NodeType.RACK_LOCAL)) { allowedLocalityLevel.put(priority, NodeType.OFF_SWITCH); resetSchedulingOpportunities(priority); } } return allowedLocalityLevel.get(priority); }
private Resource assignContainer(FSSchedulerNode node, boolean reserved) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (this) { for (Priority priority : prioritiesToTry) { if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } addSchedulingOpportunity(priority); // Check the AM resource usage for the leaf queue if (getLiveContainers().size() == 0 && !getUnmanagedAM()) { if (!getQueue().canRunAppAM(getAMResource())) { return Resources.none(); } } ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved); } ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved); } } } return Resources.none(); }
public synchronized RMContainer allocate( NodeType type, FSSchedulerNode node, Priority priority, ResourceRequest request, Container container) { // Update allowed locality level NodeType allowed = allowedLocalityLevel.get(priority); if (allowed != null) { if (allowed.equals(NodeType.OFF_SWITCH) && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) { this.resetAllowedLocalityLevel(priority, type); } else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) { this.resetAllowedLocalityLevel(priority, type); } } // Required sanity check - AM can call 'allocate' to update resource // request without locking the scheduler, hence we need to check if (getTotalRequiredResources(priority) <= 0) { return null; } // Create RMContainer RMContainer rmContainer = new RMContainerImpl( container, getApplicationAttemptId(), node.getNodeID(), appSchedulingInfo.getUser(), rmContext); // Add it to allContainers list. newlyAllocatedContainers.add(rmContainer); liveContainers.put(container.getId(), rmContainer); // Update consumption and track allocations List<ResourceRequest> resourceRequestList = appSchedulingInfo.allocate(type, node, priority, request, container); Resources.addTo(currentConsumption, container.getResource()); // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList); // Inform the container rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START)); if (LOG.isDebugEnabled()) { LOG.debug( "allocate: applicationAttemptId=" + container.getId().getApplicationAttemptId() + " container=" + container.getId() + " host=" + container.getNodeId().getHost() + " type=" + type); } RMAuditLogger.logSuccess( getUser(), AuditConstants.ALLOC_CONTAINER, "SchedulerApp", getApplicationId(), container.getId()); return rmContainer; }
private Resource assignContainer( FSSchedulerNode node, boolean reserved, TransactionState transactionState) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } if (reserved) { RMContainer rmContainer = node.getReservedContainer(); Priority priority = rmContainer.getReservedPriority(); // Make sure the application still needs requests at this priority if (app.getTotalRequiredResources(priority) == 0) { unreserve(priority, node); return Resources.none(); } } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : app.getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (app) { for (Priority priority : prioritiesToTry) { if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } app.addSchedulingOpportunity(priority); ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = app.getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = app.getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer( node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer( node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState); } ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer( node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState); } } } return Resources.none(); }