/** * Whether this app has containers requests that could be satisfied on the given node, if the node * had full space. */ public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) { ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY); ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName()); ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName()); return // There must be outstanding requests at the given priority: anyRequest != null && anyRequest.getNumContainers() > 0 && // If locality relaxation is turned off at *-level, there must be a // non-zero request for the node's rack: (anyRequest.getRelaxLocality() || (rackRequest != null && rackRequest.getNumContainers() > 0)) && // If locality relaxation is turned off at rack-level, there must be a // non-zero request at the node: (rackRequest == null || rackRequest.getRelaxLocality() || (nodeRequest != null && nodeRequest.getNumContainers() > 0)) && // The requested container must be able to fit on the node: Resources.lessThanOrEqual( RESOURCE_CALCULATOR, null, anyRequest.getCapability(), node.getRMNode().getTotalCapability()); }
/** * Create and return a container object reflecting an allocation for the given appliction on the * given node with the given capability and priority. */ public Container createContainer(FSSchedulerNode node, Resource capability, Priority priority) { NodeId nodeId = node.getRMNode().getNodeID(); ContainerId containerId = BuilderUtils.newContainerId(getApplicationAttemptId(), getNewContainerId()); // Create the container Container container = BuilderUtils.newContainer( containerId, nodeId, node.getRMNode().getHttpAddress(), capability, priority, null); return container; }
/** * Assign a container to this node to facilitate {@code request}. If node does not have enough * memory, create a reservation. This is called once we are sure the particular request should be * facilitated by this node. * * @param node The node to try placing the container on. * @param request The ResourceRequest we're trying to satisfy. * @param type The locality of the assignment. * @param reserved Whether there's already a container reserved for this app on the node. * @return If an assignment was made, returns the resources allocated to the container. If a * reservation was made, returns FairScheduler.CONTAINER_RESERVED. If no assignment or * reservation was made, returns an empty resource. */ private Resource assignContainer( FSSchedulerNode node, ResourceRequest request, NodeType type, boolean reserved) { // How much does this request need? Resource capability = request.getCapability(); // How much does the node have? Resource available = node.getAvailableResource(); Container container = null; if (reserved) { container = node.getReservedContainer().getContainer(); } else { container = createContainer(node, capability, request.getPriority()); } // Can we allocate a container on this node? if (Resources.fitsIn(capability, available)) { // Inform the application of the new container for this request RMContainer allocatedContainer = allocate(type, node, request.getPriority(), request, container); if (allocatedContainer == null) { // Did the application need this resource? if (reserved) { unreserve(request.getPriority(), node); } return Resources.none(); } // If we had previously made a reservation, delete it if (reserved) { unreserve(request.getPriority(), node); } // Inform the node node.allocateContainer(allocatedContainer); // If this container is used to run AM, update the leaf queue's AM usage if (getLiveContainers().size() == 1 && !getUnmanagedAM()) { getQueue().addAMResourceUsage(container.getResource()); setAmRunning(true); } return container.getResource(); } else { // The desired container won't fit here, so reserve reserve(request.getPriority(), node, container, reserved); return FairScheduler.CONTAINER_RESERVED; } }
/** * Reserve a spot for {@code container} on this {@code node}. If the container is {@code * alreadyReserved} on the node, simply update relevant bookeeping. This dispatches ro relevant * handlers in {@link FSSchedulerNode}.. */ private void reserve( Priority priority, FSSchedulerNode node, Container container, boolean alreadyReserved) { LOG.info("Making reservation: node=" + node.getNodeName() + " app_id=" + getApplicationId()); if (!alreadyReserved) { getMetrics().reserveResource(getUser(), container.getResource()); RMContainer rmContainer = super.reserve(node, priority, null, container); node.reserveResource(this, priority, rmContainer); } else { RMContainer rmContainer = node.getReservedContainer(); super.reserve(node, priority, rmContainer, container); node.reserveResource(this, priority, rmContainer); } }
/** * Assign a container to this node to facilitate {@code request}. If node does not have enough * memory, create a reservation. This is called once we are sure the particular request should be * facilitated by this node. */ private Resource assignContainer( FSSchedulerNode node, Priority priority, ResourceRequest request, NodeType type, boolean reserved, TransactionState transactionState) { // How much does this request need? Resource capability = request.getCapability(); // How much does the node have? Resource available = node.getAvailableResource(); Container container = null; if (reserved) { container = node.getReservedContainer().getContainer(); } else { container = createContainer(app, node, capability, priority, transactionState); } // Can we allocate a container on this node? if (Resources.fitsIn(capability, available)) { // Inform the application of the new container for this request RMContainer allocatedContainer = app.allocate(type, node, priority, request, container, transactionState); if (allocatedContainer == null) { // Did the application need this resource? if (reserved) { unreserve(priority, node); } return Resources.none(); } // If we had previously made a reservation, delete it if (reserved) { unreserve(priority, node); } // Inform the node node.allocateContainer(app.getApplicationId(), allocatedContainer); return container.getResource(); } else { // The desired container won't fit here, so reserve reserve(priority, node, container, reserved, transactionState); return FairScheduler.CONTAINER_RESERVED; } }
@Override public Resource assignContainer(FSSchedulerNode node) { Resource assigned = Resources.none(); if (LOG.isDebugEnabled()) { LOG.debug("Node " + node.getNodeName() + " offered to queue: " + getName()); } if (!assignContainerPreCheck(node)) { return assigned; } Comparator<Schedulable> comparator = policy.getComparator(); Collections.sort(runnableApps, comparator); for (FSAppAttempt sched : runnableApps) { if (SchedulerAppUtils.isBlacklisted(sched, node, LOG)) { continue; } assigned = sched.assignContainer(node); if (!assigned.equals(Resources.none())) { break; } } return assigned; }
private synchronized void unreserveInternal(Priority priority, FSSchedulerNode node) { Map<NodeId, RMContainer> reservedContainers = this.reservedContainers.get(priority); RMContainer reservedContainer = reservedContainers.remove(node.getNodeID()); if (reservedContainers.isEmpty()) { this.reservedContainers.remove(priority); } // Reset the re-reservation count resetReReservations(priority); Resource resource = reservedContainer.getContainer().getResource(); Resources.subtractFrom(currentReservation, resource); LOG.info( "Application " + getApplicationId() + " unreserved " + " on node " + node + ", currently has " + reservedContainers.size() + " at priority " + priority + "; currentReservation " + currentReservation); }
/** * Called when this application already has an existing reservation on the given node. Sees * whether we can turn the reservation into an allocation. Also checks whether the application * needs the reservation anymore, and releases it if not. * * @param node Node that the application has an existing reservation on */ public Resource assignReservedContainer(FSSchedulerNode node) { RMContainer rmContainer = node.getReservedContainer(); Priority priority = rmContainer.getReservedPriority(); // Make sure the application still needs requests at this priority if (getTotalRequiredResources(priority) == 0) { unreserve(priority, node); return Resources.none(); } // Fail early if the reserved container won't fit. // Note that we have an assumption here that there's only one container size // per priority. if (!Resources.fitsIn( node.getReservedContainer().getReservedResource(), node.getAvailableResource())) { return Resources.none(); } return assignContainer(node, true); }
private Resource assignContainer(FSSchedulerNode node, boolean reserved) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (this) { for (Priority priority : prioritiesToTry) { if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } addSchedulingOpportunity(priority); // Check the AM resource usage for the leaf queue if (getLiveContainers().size() == 0 && !getUnmanagedAM()) { if (!getQueue().canRunAppAM(getAMResource())) { return Resources.none(); } } ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved); } ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved); } } } return Resources.none(); }
/** * Remove the reservation on {@code node} at the given {@link Priority}. This dispatches * SchedulerNode handlers as well. */ public void unreserve(Priority priority, FSSchedulerNode node) { RMContainer rmContainer = node.getReservedContainer(); unreserveInternal(priority, node); node.unreserveResource(this); getMetrics().unreserveResource(getUser(), rmContainer.getContainer().getResource()); }
public synchronized RMContainer allocate( NodeType type, FSSchedulerNode node, Priority priority, ResourceRequest request, Container container) { // Update allowed locality level NodeType allowed = allowedLocalityLevel.get(priority); if (allowed != null) { if (allowed.equals(NodeType.OFF_SWITCH) && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) { this.resetAllowedLocalityLevel(priority, type); } else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) { this.resetAllowedLocalityLevel(priority, type); } } // Required sanity check - AM can call 'allocate' to update resource // request without locking the scheduler, hence we need to check if (getTotalRequiredResources(priority) <= 0) { return null; } // Create RMContainer RMContainer rmContainer = new RMContainerImpl( container, getApplicationAttemptId(), node.getNodeID(), appSchedulingInfo.getUser(), rmContext); // Add it to allContainers list. newlyAllocatedContainers.add(rmContainer); liveContainers.put(container.getId(), rmContainer); // Update consumption and track allocations List<ResourceRequest> resourceRequestList = appSchedulingInfo.allocate(type, node, priority, request, container); Resources.addTo(currentConsumption, container.getResource()); // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList); // Inform the container rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START)); if (LOG.isDebugEnabled()) { LOG.debug( "allocate: applicationAttemptId=" + container.getId().getApplicationAttemptId() + " container=" + container.getId() + " host=" + container.getNodeId().getHost() + " type=" + type); } RMAuditLogger.logSuccess( getUser(), AuditConstants.ALLOC_CONTAINER, "SchedulerApp", getApplicationId(), container.getId()); return rmContainer; }
private Resource assignContainer( FSSchedulerNode node, boolean reserved, TransactionState transactionState) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } if (reserved) { RMContainer rmContainer = node.getReservedContainer(); Priority priority = rmContainer.getReservedPriority(); // Make sure the application still needs requests at this priority if (app.getTotalRequiredResources(priority) == 0) { unreserve(priority, node); return Resources.none(); } } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : app.getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (app) { for (Priority priority : prioritiesToTry) { if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } app.addSchedulingOpportunity(priority); ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = app.getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = app.getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer( node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer( node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState); } ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer( node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState); } } } return Resources.none(); }
/** * Remove the reservation on {@code node} at the given {@link Priority}. This dispatches to the * SchedulerApp and SchedulerNode handlers for an unreservation. */ public void unreserve(Priority priority, FSSchedulerNode node) { RMContainer rmContainer = node.getReservedContainer(); app.unreserve(node, priority); node.unreserveResource(app); getMetrics().unreserveResource(app.getUser(), rmContainer.getContainer().getResource()); }