/**
   * Assign a container to this node to facilitate {@code request}. If node does not have enough
   * memory, create a reservation. This is called once we are sure the particular request should be
   * facilitated by this node.
   *
   * @param node The node to try placing the container on.
   * @param request The ResourceRequest we're trying to satisfy.
   * @param type The locality of the assignment.
   * @param reserved Whether there's already a container reserved for this app on the node.
   * @return If an assignment was made, returns the resources allocated to the container. If a
   *     reservation was made, returns FairScheduler.CONTAINER_RESERVED. If no assignment or
   *     reservation was made, returns an empty resource.
   */
  private Resource assignContainer(
      FSSchedulerNode node, ResourceRequest request, NodeType type, boolean reserved) {

    // How much does this request need?
    Resource capability = request.getCapability();

    // How much does the node have?
    Resource available = node.getAvailableResource();

    Container container = null;
    if (reserved) {
      container = node.getReservedContainer().getContainer();
    } else {
      container = createContainer(node, capability, request.getPriority());
    }

    // Can we allocate a container on this node?
    if (Resources.fitsIn(capability, available)) {
      // Inform the application of the new container for this request
      RMContainer allocatedContainer =
          allocate(type, node, request.getPriority(), request, container);
      if (allocatedContainer == null) {
        // Did the application need this resource?
        if (reserved) {
          unreserve(request.getPriority(), node);
        }
        return Resources.none();
      }

      // If we had previously made a reservation, delete it
      if (reserved) {
        unreserve(request.getPriority(), node);
      }

      // Inform the node
      node.allocateContainer(allocatedContainer);

      // If this container is used to run AM, update the leaf queue's AM usage
      if (getLiveContainers().size() == 1 && !getUnmanagedAM()) {
        getQueue().addAMResourceUsage(container.getResource());
        setAmRunning(true);
      }

      return container.getResource();
    } else {
      // The desired container won't fit here, so reserve
      reserve(request.getPriority(), node, container, reserved);

      return FairScheduler.CONTAINER_RESERVED;
    }
  }
Beispiel #2
0
 public static ResourceRequest newResourceRequest(ResourceRequest r) {
   ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class);
   request.setPriority(r.getPriority());
   request.setResourceName(r.getResourceName());
   request.setCapability(r.getCapability());
   request.setNumContainers(r.getNumContainers());
   return request;
 }
  /**
   * Changed the return type to AllocateResponse which use to hold a reference to AMResponse.
   *
   * <p>AMResponse seems to have disappeared in CDH 4.6
   *
   * @param requestedContainers
   * @param releasedContainers
   * @return
   * @throws YarnRemoteException
   */
  public AllocateResponse allocateRequest(
      List<ResourceRequest> requestedContainers, List<ContainerId> releasedContainers)
      throws YarnRemoteException {

    if (amResourceManager == null)
      throw new IllegalStateException(
          "Cannot send allocation request before connecting to the resource manager!");

    LOG.info(
        "Sending allocation request"
            + ", requestedSize="
            + requestedContainers.size()
            + ", releasedSize="
            + releasedContainers.size());

    for (ResourceRequest req : requestedContainers)
      LOG.info(
          "Requesting container, host="
              + req.getHostName()
              + ", amount="
              + req.getNumContainers()
              + ", memory="
              + req.getCapability().getMemory()
              + ", priority="
              + req.getPriority().getPriority());

    for (ContainerId rel : releasedContainers) LOG.info("Releasing container: " + rel.getId());

    AllocateRequest request = Records.newRecord(AllocateRequest.class);
    request.setResponseId(rmRequestId.incrementAndGet());
    request.setApplicationAttemptId(appAttemptId);
    request.addAllAsks(requestedContainers);
    request.addAllReleases(releasedContainers);

    AllocateResponse response = amResourceManager.allocate(request);

    // response.getAllocatedContainers()

    LOG.debug(
        "Got an allocation response, "
            + ", responseId="
            + response.getResponseId()
            + ", numClusterNodes="
            + response.getNumClusterNodes()
            + ", headroom="
            + response.getAvailableResources().getMemory()
            + ", allocatedSize="
            + response.getAllocatedContainers().size()
            + ", updatedNodes="
            + response.getUpdatedNodes().size()
            + ", reboot="
            + response.getReboot()
            + ", completedSize="
            + response.getCompletedContainersStatuses().size());

    return response;
  }
  @Override
  public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
    Preconditions.checkArgument(
        progressIndicator >= 0, "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    List<ResourceRequest> askList = null;
    List<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
    List<String> blacklistToAdd = new ArrayList<String>();
    List<String> blacklistToRemove = new ArrayList<String>();

    try {
      synchronized (this) {
        askList = new ArrayList<ResourceRequest>(ask.size());
        for (ResourceRequest r : ask) {
          // create a copy of ResourceRequest as we might change it while the
          // RPC layer is using it to send info across
          askList.add(
              ResourceRequest.newInstance(
                  r.getPriority(),
                  r.getResourceName(),
                  r.getCapability(),
                  r.getNumContainers(),
                  r.getRelaxLocality(),
                  r.getNodeLabelExpression()));
        }
        releaseList = new ArrayList<ContainerId>(release);
        // optimistically clear this collection assuming no RPC failure
        ask.clear();
        release.clear();

        blacklistToAdd.addAll(blacklistAdditions);
        blacklistToRemove.addAll(blacklistRemovals);

        ResourceBlacklistRequest blacklistRequest =
            (blacklistToAdd != null) || (blacklistToRemove != null)
                ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove)
                : null;

        allocateRequest =
            AllocateRequest.newInstance(
                lastResponseId, progressIndicator, askList, releaseList, blacklistRequest);
        // clear blacklistAdditions and blacklistRemovals before
        // unsynchronized part
        blacklistAdditions.clear();
        blacklistRemovals.clear();
      }

      try {
        allocateResponse = rmClient.allocate(allocateRequest);
      } catch (ApplicationMasterNotRegisteredException e) {
        LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
        synchronized (this) {
          release.addAll(this.pendingRelease);
          blacklistAdditions.addAll(this.blacklistedNodes);
          for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr :
              remoteRequestsTable.values()) {
            for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) {
              for (ResourceRequestInfo request : capabalities.values()) {
                addResourceRequestToAsk(request.remoteRequest);
              }
            }
          }
        }
        // re register with RM
        registerApplicationMaster();
        allocateResponse = allocate(progressIndicator);
        return allocateResponse;
      }

      synchronized (this) {
        // update these on successful RPC
        clusterNodeCount = allocateResponse.getNumClusterNodes();
        lastResponseId = allocateResponse.getResponseId();
        clusterAvailableResources = allocateResponse.getAvailableResources();
        if (!allocateResponse.getNMTokens().isEmpty()) {
          populateNMTokens(allocateResponse.getNMTokens());
        }
        if (allocateResponse.getAMRMToken() != null) {
          updateAMRMToken(allocateResponse.getAMRMToken());
        }
        if (!pendingRelease.isEmpty()
            && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
          removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
        }
      }
    } finally {
      // TODO how to differentiate remote yarn exception vs error in rpc
      if (allocateResponse == null) {
        // we hit an exception in allocate()
        // preserve ask and release for next call to allocate()
        synchronized (this) {
          release.addAll(releaseList);
          // requests could have been added or deleted during call to allocate
          // If requests were added/removed then there is nothing to do since
          // the ResourceRequest object in ask would have the actual new value.
          // If ask does not have this ResourceRequest then it was unchanged and
          // so we can add the value back safely.
          // This assumes that there will no concurrent calls to allocate() and
          // so we dont have to worry about ask being changed in the
          // synchronized block at the beginning of this method.
          for (ResourceRequest oldAsk : askList) {
            if (!ask.contains(oldAsk)) {
              ask.add(oldAsk);
            }
          }

          blacklistAdditions.addAll(blacklistToAdd);
          blacklistRemovals.addAll(blacklistToRemove);
        }
      }
    }
    return allocateResponse;
  }
  protected void containerFailedOnHost(String hostName) {
    if (!nodeBlacklistingEnabled) {
      return;
    }
    if (blacklistedNodes.contains(hostName)) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Host " + hostName + " is already blacklisted.");
      }
      return; // already blacklisted
    }
    Integer failures = nodeFailures.remove(hostName);
    failures = failures == null ? Integer.valueOf(0) : failures;
    failures++;
    LOG.info(failures + " failures on node " + hostName);
    if (failures >= maxTaskFailuresPerNode) {
      blacklistedNodes.add(hostName);
      // Even if blacklisting is ignored, continue to remove the host from
      // the request table. The RM may have additional nodes it can allocate on.
      LOG.info("Blacklisted host " + hostName);

      // remove all the requests corresponding to this hostname
      for (Map<String, Map<Resource, ResourceRequest>> remoteRequests :
          remoteRequestsTable.values()) {
        // remove from host if no pending allocations
        boolean foundAll = true;
        Map<Resource, ResourceRequest> reqMap = remoteRequests.get(hostName);
        if (reqMap != null) {
          for (ResourceRequest req : reqMap.values()) {
            if (!ask.remove(req)) {
              foundAll = false;
              // if ask already sent to RM, we can try and overwrite it if possible.
              // send a new ask to RM with numContainers
              // specified for the blacklisted host to be 0.
              ResourceRequest zeroedRequest =
                  ResourceRequest.newInstance(
                      req.getPriority(),
                      req.getResourceName(),
                      req.getCapability(),
                      req.getNumContainers(),
                      req.getRelaxLocality());

              zeroedRequest.setNumContainers(0);
              // to be sent to RM on next heartbeat
              addResourceRequestToAsk(zeroedRequest);
            }
          }
          // if all requests were still in ask queue
          // we can remove this request
          if (foundAll) {
            remoteRequests.remove(hostName);
          }
        }
        // TODO handling of rack blacklisting
        // Removing from rack should be dependent on no. of failures within the rack
        // Blacklisting a rack on the basis of a single node's blacklisting
        // may be overly aggressive.
        // Node failures could be co-related with other failures on the same rack
        // but we probably need a better approach at trying to decide how and when
        // to blacklist a rack
      }
    } else {
      nodeFailures.put(hostName, failures);
    }
  }