public static ResourceRequest newResourceRequest( Priority priority, String hostName, Resource capability, int numContainers) { ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class); request.setPriority(priority); request.setResourceName(hostName); request.setCapability(capability); request.setNumContainers(numContainers); return request; }
/** * Changed the return type to AllocateResponse which use to hold a reference to AMResponse. * * <p>AMResponse seems to have disappeared in CDH 4.6 * * @param requestedContainers * @param releasedContainers * @return * @throws YarnRemoteException */ public AllocateResponse allocateRequest( List<ResourceRequest> requestedContainers, List<ContainerId> releasedContainers) throws YarnRemoteException { if (amResourceManager == null) throw new IllegalStateException( "Cannot send allocation request before connecting to the resource manager!"); LOG.info( "Sending allocation request" + ", requestedSize=" + requestedContainers.size() + ", releasedSize=" + releasedContainers.size()); for (ResourceRequest req : requestedContainers) LOG.info( "Requesting container, host=" + req.getHostName() + ", amount=" + req.getNumContainers() + ", memory=" + req.getCapability().getMemory() + ", priority=" + req.getPriority().getPriority()); for (ContainerId rel : releasedContainers) LOG.info("Releasing container: " + rel.getId()); AllocateRequest request = Records.newRecord(AllocateRequest.class); request.setResponseId(rmRequestId.incrementAndGet()); request.setApplicationAttemptId(appAttemptId); request.addAllAsks(requestedContainers); request.addAllReleases(releasedContainers); AllocateResponse response = amResourceManager.allocate(request); // response.getAllocatedContainers() LOG.debug( "Got an allocation response, " + ", responseId=" + response.getResponseId() + ", numClusterNodes=" + response.getNumClusterNodes() + ", headroom=" + response.getAvailableResources().getMemory() + ", allocatedSize=" + response.getAllocatedContainers().size() + ", updatedNodes=" + response.getUpdatedNodes().size() + ", reboot=" + response.getReboot() + ", completedSize=" + response.getCompletedContainersStatuses().size()); return response; }
/** * Whether this app has containers requests that could be satisfied on the given node, if the node * had full space. */ public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) { ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY); ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName()); ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName()); return // There must be outstanding requests at the given priority: anyRequest != null && anyRequest.getNumContainers() > 0 && // If locality relaxation is turned off at *-level, there must be a // non-zero request for the node's rack: (anyRequest.getRelaxLocality() || (rackRequest != null && rackRequest.getNumContainers() > 0)) && // If locality relaxation is turned off at rack-level, there must be a // non-zero request at the node: (rackRequest == null || rackRequest.getRelaxLocality() || (nodeRequest != null && nodeRequest.getNumContainers() > 0)) && // The requested container must be able to fit on the node: Resources.lessThanOrEqual( RESOURCE_CALCULATOR, null, anyRequest.getCapability(), node.getRMNode().getTotalCapability()); }
private ResourceRequest validateAndCreateResourceRequest( ApplicationSubmissionContext submissionContext, boolean isRecovery) throws InvalidResourceRequestException { // Validation of the ApplicationSubmissionContext needs to be completed // here. Only those fields that are dependent on RM's configuration are // checked here as they have to be validated whether they are part of new // submission or just being recovered. // Check whether AM resource requirements are within required limits if (!submissionContext.getUnmanagedAM()) { ResourceRequest amReq = submissionContext.getAMContainerResourceRequest(); if (amReq == null) { amReq = BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, submissionContext.getResource(), 1); } // set label expression for AM container if (null == amReq.getNodeLabelExpression()) { amReq.setNodeLabelExpression(submissionContext.getNodeLabelExpression()); } try { SchedulerUtils.normalizeAndValidateRequest( amReq, scheduler.getMaximumResourceCapability(), submissionContext.getQueue(), scheduler, isRecovery, rmContext); } catch (InvalidResourceRequestException e) { LOG.warn( "RM app submission failed in validating AM resource request" + " for application " + submissionContext.getApplicationId(), e); throw e; } SchedulerUtils.normalizeRequest( amReq, scheduler.getResourceCalculator(), scheduler.getClusterResource(), scheduler.getMinimumResourceCapability(), scheduler.getMaximumResourceCapability(), scheduler.getMinimumResourceCapability()); return amReq; } return null; }
public static ResourceRequest newResourceRequest(ResourceRequest r) { ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class); request.setPriority(r.getPriority()); request.setResourceName(r.getResourceName()); request.setCapability(r.getCapability()); request.setNumContainers(r.getNumContainers()); return request; }
/** * Setup a container request on specified node * * @param node the specified node * @return ResourceRequest sent to RM */ private ResourceRequest setupAContainerAskForRM(String node) { ResourceRequest request = Records.newRecord(ResourceRequest.class); request.setResourceName(node); request.setNumContainers(1); // important Priority priority = Records.newRecord(Priority.class); priority.setPriority(requestPriority); request.setPriority(priority); Resource capability = Records.newRecord(Resource.class); capability.setMemory(containerMemory); request.setCapability(capability); return request; }
@Override public void updateDemand() { demand = Resources.createResource(0); // Demand is current consumption plus outstanding requests Resources.addTo(demand, getCurrentConsumption()); // Add up outstanding resource requests synchronized (this) { for (Priority p : getPriorities()) { for (ResourceRequest r : getResourceRequests(p).values()) { Resource total = Resources.multiply(r.getCapability(), r.getNumContainers()); Resources.addTo(demand, total); } } } }
@Override protected AllocateResponse doContainerRequest() { List<ResourceRequest> requestedContainers = null; if (allocationDirty.getAndSet(false)) { requestedContainers = createRequests(); } else { requestedContainers = EMPTY; } // add pending containers to be released List<ContainerId> release = new ArrayList<ContainerId>(); ContainerId element = null; while ((element = releaseContainers.poll()) != null) { release.add(element); } if (log.isDebugEnabled()) { log.debug("Requesting containers using " + requestedContainers.size() + " requests."); for (ResourceRequest resourceRequest : requestedContainers) { log.debug( "ResourceRequest: " + resourceRequest + " with count=" + resourceRequest.getNumContainers() + " with hostName=" + resourceRequest.getResourceName()); } log.debug("Releasing containers " + release.size()); for (ContainerId cid : release) { log.debug("Release container=" + cid); } log.debug("Request id will be: " + requestId.get()); } // build the allocation request AllocateRequest request = Records.newRecord(AllocateRequest.class); request.setResponseId(requestId.get()); request.setAskList(requestedContainers); request.setReleaseList(release); request.setProgress(applicationProgress); // do request and return response AllocateResponse allocate = getRmTemplate().allocate(request); requestId.set(allocate.getResponseId()); return allocate; }
@Test public void testMove() { final String user = "******"; Queue parentQueue = createQueue("parent", null); Queue oldQueue = createQueue("old", parentQueue); Queue newQueue = createQueue("new", parentQueue); QueueMetrics parentMetrics = parentQueue.getMetrics(); QueueMetrics oldMetrics = oldQueue.getMetrics(); QueueMetrics newMetrics = newQueue.getMetrics(); ApplicationAttemptId appAttId = createAppAttemptId(0, 0); SchedulerApplicationAttempt app = new SchedulerApplicationAttempt( appAttId, user, oldQueue, oldQueue.getActiveUsersManager(), null); oldMetrics.submitApp(user); // Resource request Resource requestedResource = Resource.newInstance(1536, 2); Priority requestedPriority = Priority.newInstance(2); ResourceRequest request = ResourceRequest.newInstance(requestedPriority, ResourceRequest.ANY, requestedResource, 3); app.updateResourceRequests(Arrays.asList(request)); // Allocated container RMContainer container1 = createRMContainer(appAttId, 1, requestedResource); app.liveContainers.put(container1.getContainerId(), container1); SchedulerNode node = createNode(); app.appSchedulingInfo.allocate( NodeType.OFF_SWITCH, node, requestedPriority, request, container1.getContainer()); // Reserved container Priority prio1 = Priority.newInstance(1); Resource reservedResource = Resource.newInstance(2048, 3); RMContainer container2 = createReservedRMContainer(appAttId, 1, reservedResource, node.getNodeID(), prio1); Map<NodeId, RMContainer> reservations = new HashMap<NodeId, RMContainer>(); reservations.put(node.getNodeID(), container2); app.reservedContainers.put(prio1, reservations); oldMetrics.reserveResource(user, reservedResource); checkQueueMetrics(oldMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); checkQueueMetrics(newMetrics, 0, 0, 0, 0, 0, 0, 0, 0); checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); app.move(newQueue); checkQueueMetrics(oldMetrics, 0, 0, 0, 0, 0, 0, 0, 0); checkQueueMetrics(newMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); }
/** * Assign a container to this node to facilitate {@code request}. If node does not have enough * memory, create a reservation. This is called once we are sure the particular request should be * facilitated by this node. */ private Resource assignContainer( FSSchedulerNode node, Priority priority, ResourceRequest request, NodeType type, boolean reserved, TransactionState transactionState) { // How much does this request need? Resource capability = request.getCapability(); // How much does the node have? Resource available = node.getAvailableResource(); Container container = null; if (reserved) { container = node.getReservedContainer().getContainer(); } else { container = createContainer(app, node, capability, priority, transactionState); } // Can we allocate a container on this node? if (Resources.fitsIn(capability, available)) { // Inform the application of the new container for this request RMContainer allocatedContainer = app.allocate(type, node, priority, request, container, transactionState); if (allocatedContainer == null) { // Did the application need this resource? if (reserved) { unreserve(priority, node); } return Resources.none(); } // If we had previously made a reservation, delete it if (reserved) { unreserve(priority, node); } // Inform the node node.allocateContainer(app.getApplicationId(), allocatedContainer); return container.getResource(); } else { // The desired container won't fit here, so reserve reserve(priority, node, container, reserved, transactionState); return FairScheduler.CONTAINER_RESERVED; } }
private void addResourceRequest(Priority priority, String resourceName, Resource capability) { Map<String, Map<Resource, ResourceRequest>> remoteRequests = this.remoteRequestsTable.get(priority); if (remoteRequests == null) { remoteRequests = new HashMap<String, Map<Resource, ResourceRequest>>(); this.remoteRequestsTable.put(priority, remoteRequests); if (LOG.isDebugEnabled()) { LOG.debug("Added priority=" + priority); } } Map<Resource, ResourceRequest> reqMap = remoteRequests.get(resourceName); if (reqMap == null) { reqMap = new HashMap<Resource, ResourceRequest>(); remoteRequests.put(resourceName, reqMap); } ResourceRequest remoteRequest = reqMap.get(capability); if (remoteRequest == null) { remoteRequest = recordFactory.newRecordInstance(ResourceRequest.class); remoteRequest.setPriority(priority); remoteRequest.setResourceName(resourceName); remoteRequest.setCapability(capability); remoteRequest.setNumContainers(0); reqMap.put(capability, remoteRequest); } remoteRequest.setNumContainers(remoteRequest.getNumContainers() + 1); // Note this down for next interaction with ResourceManager addResourceRequestToAsk(remoteRequest); if (LOG.isDebugEnabled()) { LOG.debug( "addResourceRequest:" + " applicationId=" + applicationId.getId() + " priority=" + priority.getPriority() + " resourceName=" + resourceName + " numContainers=" + remoteRequest.getNumContainers() + " #asks=" + ask.size()); } }
/** * Assign a container to this node to facilitate {@code request}. If node does not have enough * memory, create a reservation. This is called once we are sure the particular request should be * facilitated by this node. * * @param node The node to try placing the container on. * @param request The ResourceRequest we're trying to satisfy. * @param type The locality of the assignment. * @param reserved Whether there's already a container reserved for this app on the node. * @return If an assignment was made, returns the resources allocated to the container. If a * reservation was made, returns FairScheduler.CONTAINER_RESERVED. If no assignment or * reservation was made, returns an empty resource. */ private Resource assignContainer( FSSchedulerNode node, ResourceRequest request, NodeType type, boolean reserved) { // How much does this request need? Resource capability = request.getCapability(); // How much does the node have? Resource available = node.getAvailableResource(); Container container = null; if (reserved) { container = node.getReservedContainer().getContainer(); } else { container = createContainer(node, capability, request.getPriority()); } // Can we allocate a container on this node? if (Resources.fitsIn(capability, available)) { // Inform the application of the new container for this request RMContainer allocatedContainer = allocate(type, node, request.getPriority(), request, container); if (allocatedContainer == null) { // Did the application need this resource? if (reserved) { unreserve(request.getPriority(), node); } return Resources.none(); } // If we had previously made a reservation, delete it if (reserved) { unreserve(request.getPriority(), node); } // Inform the node node.allocateContainer(allocatedContainer); // If this container is used to run AM, update the leaf queue's AM usage if (getLiveContainers().size() == 1 && !getUnmanagedAM()) { getQueue().addAMResourceUsage(container.getResource()); setAmRunning(true); } return container.getResource(); } else { // The desired container won't fit here, so reserve reserve(request.getPriority(), node, container, reserved); return FairScheduler.CONTAINER_RESERVED; } }
/** * Utility method creating a {@link ResourceRequest}. * * @param numContainers number of containers to request * @return request to be sent to resource manager */ private ResourceRequest getContainerResourceRequest( int numContainers, String hostName, boolean relaxLocality) { ResourceRequest request = Records.newRecord(ResourceRequest.class); request.setRelaxLocality(relaxLocality); request.setResourceName(hostName); request.setNumContainers(numContainers); Priority pri = Records.newRecord(Priority.class); pri.setPriority(priority); request.setPriority(pri); Resource capability = Records.newRecord(Resource.class); capability.setMemory(memory); ResourceCompat.setVirtualCores(capability, virtualcores); request.setCapability(capability); return request; }
private Resource assignContainer(FSSchedulerNode node, boolean reserved) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (this) { for (Priority priority : prioritiesToTry) { if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } addSchedulingOpportunity(priority); // Check the AM resource usage for the leaf queue if (getLiveContainers().size() == 0 && !getUnmanagedAM()) { if (!getQueue().canRunAppAM(getAMResource())) { return Resources.none(); } } ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved); } ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved); } } } return Resources.none(); }
private void decResourceRequest(Priority priority, String resourceName, Resource capability) { Map<String, Map<Resource, ResourceRequest>> remoteRequests = this.remoteRequestsTable.get(priority); Map<Resource, ResourceRequest> reqMap = remoteRequests.get(resourceName); if (reqMap == null) { // as we modify the resource requests by filtering out blacklisted hosts // when they are added, this value may be null when being // decremented if (LOG.isDebugEnabled()) { LOG.debug( "Not decrementing resource as " + resourceName + " is not present in request table"); } return; } ResourceRequest remoteRequest = reqMap.get(capability); if (LOG.isDebugEnabled()) { LOG.debug( "BEFORE decResourceRequest:" + " applicationId=" + applicationId.getId() + " priority=" + priority.getPriority() + " resourceName=" + resourceName + " numContainers=" + remoteRequest.getNumContainers() + " #asks=" + ask.size()); } if (remoteRequest.getNumContainers() > 0) { // based on blacklisting comments above we can end up decrementing more // than requested. so guard for that. remoteRequest.setNumContainers(remoteRequest.getNumContainers() - 1); } if (remoteRequest.getNumContainers() == 0) { reqMap.remove(capability); if (reqMap.size() == 0) { remoteRequests.remove(resourceName); } if (remoteRequests.size() == 0) { remoteRequestsTable.remove(priority); } } // send the updated resource request to RM // send 0 container count requests also to cancel previous requests addResourceRequestToAsk(remoteRequest); if (LOG.isDebugEnabled()) { LOG.info( "AFTER decResourceRequest:" + " applicationId=" + applicationId.getId() + " priority=" + priority.getPriority() + " resourceName=" + resourceName + " numContainers=" + remoteRequest.getNumContainers() + " #asks=" + ask.size()); } }
private void updateQueueWithAllocateRequest( Allocation allocation, ApplicationAttemptId attemptId, List<ResourceRequest> resourceRequests, List<ContainerId> containerIds) throws IOException { // update queue information Resource pendingResource = Resources.createResource(0, 0); Resource allocatedResource = Resources.createResource(0, 0); String queueName = appQueueMap.get(attemptId.getApplicationId()); // container requested for (ResourceRequest request : resourceRequests) { if (request.getResourceName().equals(ResourceRequest.ANY)) { Resources.addTo( pendingResource, Resources.multiply(request.getCapability(), request.getNumContainers())); } } // container allocated for (Container container : allocation.getContainers()) { Resources.addTo(allocatedResource, container.getResource()); Resources.subtractFrom(pendingResource, container.getResource()); } // container released from AM SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId); for (ContainerId containerId : containerIds) { Container container = null; for (RMContainer c : report.getLiveContainers()) { if (c.getContainerId().equals(containerId)) { container = c.getContainer(); break; } } if (container != null) { // released allocated containers Resources.subtractFrom(allocatedResource, container.getResource()); } else { for (RMContainer c : report.getReservedContainers()) { if (c.getContainerId().equals(containerId)) { container = c.getContainer(); break; } } if (container != null) { // released reserved containers Resources.subtractFrom(pendingResource, container.getResource()); } } } // containers released/preemption from scheduler Set<ContainerId> preemptionContainers = new HashSet<ContainerId>(); if (allocation.getContainerPreemptions() != null) { preemptionContainers.addAll(allocation.getContainerPreemptions()); } if (allocation.getStrictContainerPreemptions() != null) { preemptionContainers.addAll(allocation.getStrictContainerPreemptions()); } if (!preemptionContainers.isEmpty()) { for (ContainerId containerId : preemptionContainers) { if (!preemptionContainerMap.containsKey(containerId)) { Container container = null; for (RMContainer c : report.getLiveContainers()) { if (c.getContainerId().equals(containerId)) { container = c.getContainer(); break; } } if (container != null) { preemptionContainerMap.put(containerId, container.getResource()); } } } } // update metrics SortedMap<String, Counter> counterMap = metrics.getCounters(); String names[] = new String[] { "counter.queue." + queueName + ".pending.memory", "counter.queue." + queueName + ".pending.cores", "counter.queue." + queueName + ".allocated.memory", "counter.queue." + queueName + ".allocated.cores" }; int values[] = new int[] { pendingResource.getMemory(), pendingResource.getVirtualCores(), allocatedResource.getMemory(), allocatedResource.getVirtualCores() }; for (int i = names.length - 1; i >= 0; i--) { if (!counterMap.containsKey(names[i])) { metrics.counter(names[i]); counterMap = metrics.getCounters(); } counterMap.get(names[i]).inc(values[i]); } queueLock.lock(); try { if (!schedulerMetrics.isTracked(queueName)) { schedulerMetrics.trackQueue(queueName); } } finally { queueLock.unlock(); } }
@Override public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException { Preconditions.checkArgument( progressIndicator >= 0, "Progress indicator should not be negative"); AllocateResponse allocateResponse = null; List<ResourceRequest> askList = null; List<ContainerId> releaseList = null; AllocateRequest allocateRequest = null; List<String> blacklistToAdd = new ArrayList<String>(); List<String> blacklistToRemove = new ArrayList<String>(); try { synchronized (this) { askList = new ArrayList<ResourceRequest>(ask.size()); for (ResourceRequest r : ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.add( ResourceRequest.newInstance( r.getPriority(), r.getResourceName(), r.getCapability(), r.getNumContainers(), r.getRelaxLocality(), r.getNodeLabelExpression())); } releaseList = new ArrayList<ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.clear(); release.clear(); blacklistToAdd.addAll(blacklistAdditions); blacklistToRemove.addAll(blacklistRemovals); ResourceBlacklistRequest blacklistRequest = (blacklistToAdd != null) || (blacklistToRemove != null) ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove) : null; allocateRequest = AllocateRequest.newInstance( lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.clear(); blacklistRemovals.clear(); } try { allocateResponse = rmClient.allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException e) { LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing."); synchronized (this) { release.addAll(this.pendingRelease); blacklistAdditions.addAll(this.blacklistedNodes); for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr : remoteRequestsTable.values()) { for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) { for (ResourceRequestInfo request : capabalities.values()) { addResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM registerApplicationMaster(); allocateResponse = allocate(progressIndicator); return allocateResponse; } synchronized (this) { // update these on successful RPC clusterNodeCount = allocateResponse.getNumClusterNodes(); lastResponseId = allocateResponse.getResponseId(); clusterAvailableResources = allocateResponse.getAvailableResources(); if (!allocateResponse.getNMTokens().isEmpty()) { populateNMTokens(allocateResponse.getNMTokens()); } if (allocateResponse.getAMRMToken() != null) { updateAMRMToken(allocateResponse.getAMRMToken()); } if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) { removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() synchronized (this) { release.addAll(releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. for (ResourceRequest oldAsk : askList) { if (!ask.contains(oldAsk)) { ask.add(oldAsk); } } blacklistAdditions.addAll(blacklistToAdd); blacklistRemovals.addAll(blacklistToRemove); } } } return allocateResponse; }
private Resource assignContainer( FSSchedulerNode node, boolean reserved, TransactionState transactionState) { if (LOG.isDebugEnabled()) { LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved); } if (reserved) { RMContainer rmContainer = node.getReservedContainer(); Priority priority = rmContainer.getReservedPriority(); // Make sure the application still needs requests at this priority if (app.getTotalRequiredResources(priority) == 0) { unreserve(priority, node); return Resources.none(); } } Collection<Priority> prioritiesToTry = (reserved) ? Arrays.asList(node.getReservedContainer().getReservedPriority()) : app.getPriorities(); // For each priority, see if we can schedule a node local, rack local // or off-switch request. Rack of off-switch requests may be delayed // (not scheduled) in order to promote better locality. synchronized (app) { for (Priority priority : prioritiesToTry) { if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) { continue; } app.addSchedulingOpportunity(priority); ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName()); ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName()); if (localRequest != null && !localRequest.getRelaxLocality()) { LOG.warn("Relax locality off is not supported on local request: " + localRequest); } NodeType allowedLocality; if (scheduler.isContinuousSchedulingEnabled()) { allowedLocality = app.getAllowedLocalityLevelByTime( priority, scheduler.getNodeLocalityDelayMs(), scheduler.getRackLocalityDelayMs(), scheduler.getClock().getTime()); } else { allowedLocality = app.getAllowedLocalityLevel( priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer( node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState); } if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) { continue; } if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { return assignContainer( node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState); } ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) { continue; } if (offSwitchRequest != null && offSwitchRequest.getNumContainers() != 0 && allowedLocality.equals(NodeType.OFF_SWITCH)) { return assignContainer( node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState); } } } return Resources.none(); }
ResourceRequestInfo( Priority priority, String resourceName, Resource capability, boolean relaxLocality) { remoteRequest = ResourceRequest.newInstance(priority, resourceName, capability, 0); remoteRequest.setRelaxLocality(relaxLocality); containerRequests = new LinkedHashSet<T>(); }
protected void containerFailedOnHost(String hostName) { if (!nodeBlacklistingEnabled) { return; } if (blacklistedNodes.contains(hostName)) { if (LOG.isDebugEnabled()) { LOG.debug("Host " + hostName + " is already blacklisted."); } return; // already blacklisted } Integer failures = nodeFailures.remove(hostName); failures = failures == null ? Integer.valueOf(0) : failures; failures++; LOG.info(failures + " failures on node " + hostName); if (failures >= maxTaskFailuresPerNode) { blacklistedNodes.add(hostName); // Even if blacklisting is ignored, continue to remove the host from // the request table. The RM may have additional nodes it can allocate on. LOG.info("Blacklisted host " + hostName); // remove all the requests corresponding to this hostname for (Map<String, Map<Resource, ResourceRequest>> remoteRequests : remoteRequestsTable.values()) { // remove from host if no pending allocations boolean foundAll = true; Map<Resource, ResourceRequest> reqMap = remoteRequests.get(hostName); if (reqMap != null) { for (ResourceRequest req : reqMap.values()) { if (!ask.remove(req)) { foundAll = false; // if ask already sent to RM, we can try and overwrite it if possible. // send a new ask to RM with numContainers // specified for the blacklisted host to be 0. ResourceRequest zeroedRequest = ResourceRequest.newInstance( req.getPriority(), req.getResourceName(), req.getCapability(), req.getNumContainers(), req.getRelaxLocality()); zeroedRequest.setNumContainers(0); // to be sent to RM on next heartbeat addResourceRequestToAsk(zeroedRequest); } } // if all requests were still in ask queue // we can remove this request if (foundAll) { remoteRequests.remove(hostName); } } // TODO handling of rack blacklisting // Removing from rack should be dependent on no. of failures within the rack // Blacklisting a rack on the basis of a single node's blacklisting // may be overly aggressive. // Node failures could be co-related with other failures on the same rack // but we probably need a better approach at trying to decide how and when // to blacklist a rack } } else { nodeFailures.put(hostName, failures); } }