/**
   * Whether this app has containers requests that could be satisfied on the given node, if the node
   * had full space.
   */
  public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) {
    ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY);
    ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName());
    ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName());

    return
    // There must be outstanding requests at the given priority:
    anyRequest != null
        && anyRequest.getNumContainers() > 0
        &&
        // If locality relaxation is turned off at *-level, there must be a
        // non-zero request for the node's rack:
        (anyRequest.getRelaxLocality()
            || (rackRequest != null && rackRequest.getNumContainers() > 0))
        &&
        // If locality relaxation is turned off at rack-level, there must be a
        // non-zero request at the node:
        (rackRequest == null
            || rackRequest.getRelaxLocality()
            || (nodeRequest != null && nodeRequest.getNumContainers() > 0))
        &&
        // The requested container must be able to fit on the node:
        Resources.lessThanOrEqual(
            RESOURCE_CALCULATOR,
            null,
            anyRequest.getCapability(),
            node.getRMNode().getTotalCapability());
  }
Exemple #2
0
 public static ResourceRequest newResourceRequest(ResourceRequest r) {
   ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class);
   request.setPriority(r.getPriority());
   request.setResourceName(r.getResourceName());
   request.setCapability(r.getCapability());
   request.setNumContainers(r.getNumContainers());
   return request;
 }
  /**
   * Changed the return type to AllocateResponse which use to hold a reference to AMResponse.
   *
   * <p>AMResponse seems to have disappeared in CDH 4.6
   *
   * @param requestedContainers
   * @param releasedContainers
   * @return
   * @throws YarnRemoteException
   */
  public AllocateResponse allocateRequest(
      List<ResourceRequest> requestedContainers, List<ContainerId> releasedContainers)
      throws YarnRemoteException {

    if (amResourceManager == null)
      throw new IllegalStateException(
          "Cannot send allocation request before connecting to the resource manager!");

    LOG.info(
        "Sending allocation request"
            + ", requestedSize="
            + requestedContainers.size()
            + ", releasedSize="
            + releasedContainers.size());

    for (ResourceRequest req : requestedContainers)
      LOG.info(
          "Requesting container, host="
              + req.getHostName()
              + ", amount="
              + req.getNumContainers()
              + ", memory="
              + req.getCapability().getMemory()
              + ", priority="
              + req.getPriority().getPriority());

    for (ContainerId rel : releasedContainers) LOG.info("Releasing container: " + rel.getId());

    AllocateRequest request = Records.newRecord(AllocateRequest.class);
    request.setResponseId(rmRequestId.incrementAndGet());
    request.setApplicationAttemptId(appAttemptId);
    request.addAllAsks(requestedContainers);
    request.addAllReleases(releasedContainers);

    AllocateResponse response = amResourceManager.allocate(request);

    // response.getAllocatedContainers()

    LOG.debug(
        "Got an allocation response, "
            + ", responseId="
            + response.getResponseId()
            + ", numClusterNodes="
            + response.getNumClusterNodes()
            + ", headroom="
            + response.getAvailableResources().getMemory()
            + ", allocatedSize="
            + response.getAllocatedContainers().size()
            + ", updatedNodes="
            + response.getUpdatedNodes().size()
            + ", reboot="
            + response.getReboot()
            + ", completedSize="
            + response.getCompletedContainersStatuses().size());

    return response;
  }
  /**
   * Assign a container to this node to facilitate {@code request}. If node does not have enough
   * memory, create a reservation. This is called once we are sure the particular request should be
   * facilitated by this node.
   *
   * @param node The node to try placing the container on.
   * @param request The ResourceRequest we're trying to satisfy.
   * @param type The locality of the assignment.
   * @param reserved Whether there's already a container reserved for this app on the node.
   * @return If an assignment was made, returns the resources allocated to the container. If a
   *     reservation was made, returns FairScheduler.CONTAINER_RESERVED. If no assignment or
   *     reservation was made, returns an empty resource.
   */
  private Resource assignContainer(
      FSSchedulerNode node, ResourceRequest request, NodeType type, boolean reserved) {

    // How much does this request need?
    Resource capability = request.getCapability();

    // How much does the node have?
    Resource available = node.getAvailableResource();

    Container container = null;
    if (reserved) {
      container = node.getReservedContainer().getContainer();
    } else {
      container = createContainer(node, capability, request.getPriority());
    }

    // Can we allocate a container on this node?
    if (Resources.fitsIn(capability, available)) {
      // Inform the application of the new container for this request
      RMContainer allocatedContainer =
          allocate(type, node, request.getPriority(), request, container);
      if (allocatedContainer == null) {
        // Did the application need this resource?
        if (reserved) {
          unreserve(request.getPriority(), node);
        }
        return Resources.none();
      }

      // If we had previously made a reservation, delete it
      if (reserved) {
        unreserve(request.getPriority(), node);
      }

      // Inform the node
      node.allocateContainer(allocatedContainer);

      // If this container is used to run AM, update the leaf queue's AM usage
      if (getLiveContainers().size() == 1 && !getUnmanagedAM()) {
        getQueue().addAMResourceUsage(container.getResource());
        setAmRunning(true);
      }

      return container.getResource();
    } else {
      // The desired container won't fit here, so reserve
      reserve(request.getPriority(), node, container, reserved);

      return FairScheduler.CONTAINER_RESERVED;
    }
  }
Exemple #5
0
  /**
   * Assign a container to this node to facilitate {@code request}. If node does not have enough
   * memory, create a reservation. This is called once we are sure the particular request should be
   * facilitated by this node.
   */
  private Resource assignContainer(
      FSSchedulerNode node,
      Priority priority,
      ResourceRequest request,
      NodeType type,
      boolean reserved,
      TransactionState transactionState) {

    // How much does this request need?
    Resource capability = request.getCapability();

    // How much does the node have?
    Resource available = node.getAvailableResource();

    Container container = null;
    if (reserved) {
      container = node.getReservedContainer().getContainer();
    } else {
      container = createContainer(app, node, capability, priority, transactionState);
    }

    // Can we allocate a container on this node?
    if (Resources.fitsIn(capability, available)) {
      // Inform the application of the new container for this request
      RMContainer allocatedContainer =
          app.allocate(type, node, priority, request, container, transactionState);
      if (allocatedContainer == null) {
        // Did the application need this resource?
        if (reserved) {
          unreserve(priority, node);
        }
        return Resources.none();
      }

      // If we had previously made a reservation, delete it
      if (reserved) {
        unreserve(priority, node);
      }

      // Inform the node
      node.allocateContainer(app.getApplicationId(), allocatedContainer);

      return container.getResource();
    } else {
      // The desired container won't fit here, so reserve
      reserve(priority, node, container, reserved, transactionState);

      return FairScheduler.CONTAINER_RESERVED;
    }
  }
  @Override
  public void updateDemand() {
    demand = Resources.createResource(0);
    // Demand is current consumption plus outstanding requests
    Resources.addTo(demand, getCurrentConsumption());

    // Add up outstanding resource requests
    synchronized (this) {
      for (Priority p : getPriorities()) {
        for (ResourceRequest r : getResourceRequests(p).values()) {
          Resource total = Resources.multiply(r.getCapability(), r.getNumContainers());
          Resources.addTo(demand, total);
        }
      }
    }
  }
  @Override
  public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
    Preconditions.checkArgument(
        progressIndicator >= 0, "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    List<ResourceRequest> askList = null;
    List<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
    List<String> blacklistToAdd = new ArrayList<String>();
    List<String> blacklistToRemove = new ArrayList<String>();

    try {
      synchronized (this) {
        askList = new ArrayList<ResourceRequest>(ask.size());
        for (ResourceRequest r : ask) {
          // create a copy of ResourceRequest as we might change it while the
          // RPC layer is using it to send info across
          askList.add(
              ResourceRequest.newInstance(
                  r.getPriority(),
                  r.getResourceName(),
                  r.getCapability(),
                  r.getNumContainers(),
                  r.getRelaxLocality(),
                  r.getNodeLabelExpression()));
        }
        releaseList = new ArrayList<ContainerId>(release);
        // optimistically clear this collection assuming no RPC failure
        ask.clear();
        release.clear();

        blacklistToAdd.addAll(blacklistAdditions);
        blacklistToRemove.addAll(blacklistRemovals);

        ResourceBlacklistRequest blacklistRequest =
            (blacklistToAdd != null) || (blacklistToRemove != null)
                ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove)
                : null;

        allocateRequest =
            AllocateRequest.newInstance(
                lastResponseId, progressIndicator, askList, releaseList, blacklistRequest);
        // clear blacklistAdditions and blacklistRemovals before
        // unsynchronized part
        blacklistAdditions.clear();
        blacklistRemovals.clear();
      }

      try {
        allocateResponse = rmClient.allocate(allocateRequest);
      } catch (ApplicationMasterNotRegisteredException e) {
        LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
        synchronized (this) {
          release.addAll(this.pendingRelease);
          blacklistAdditions.addAll(this.blacklistedNodes);
          for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr :
              remoteRequestsTable.values()) {
            for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) {
              for (ResourceRequestInfo request : capabalities.values()) {
                addResourceRequestToAsk(request.remoteRequest);
              }
            }
          }
        }
        // re register with RM
        registerApplicationMaster();
        allocateResponse = allocate(progressIndicator);
        return allocateResponse;
      }

      synchronized (this) {
        // update these on successful RPC
        clusterNodeCount = allocateResponse.getNumClusterNodes();
        lastResponseId = allocateResponse.getResponseId();
        clusterAvailableResources = allocateResponse.getAvailableResources();
        if (!allocateResponse.getNMTokens().isEmpty()) {
          populateNMTokens(allocateResponse.getNMTokens());
        }
        if (allocateResponse.getAMRMToken() != null) {
          updateAMRMToken(allocateResponse.getAMRMToken());
        }
        if (!pendingRelease.isEmpty()
            && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
          removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
        }
      }
    } finally {
      // TODO how to differentiate remote yarn exception vs error in rpc
      if (allocateResponse == null) {
        // we hit an exception in allocate()
        // preserve ask and release for next call to allocate()
        synchronized (this) {
          release.addAll(releaseList);
          // requests could have been added or deleted during call to allocate
          // If requests were added/removed then there is nothing to do since
          // the ResourceRequest object in ask would have the actual new value.
          // If ask does not have this ResourceRequest then it was unchanged and
          // so we can add the value back safely.
          // This assumes that there will no concurrent calls to allocate() and
          // so we dont have to worry about ask being changed in the
          // synchronized block at the beginning of this method.
          for (ResourceRequest oldAsk : askList) {
            if (!ask.contains(oldAsk)) {
              ask.add(oldAsk);
            }
          }

          blacklistAdditions.addAll(blacklistToAdd);
          blacklistRemovals.addAll(blacklistToRemove);
        }
      }
    }
    return allocateResponse;
  }
  protected void containerFailedOnHost(String hostName) {
    if (!nodeBlacklistingEnabled) {
      return;
    }
    if (blacklistedNodes.contains(hostName)) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Host " + hostName + " is already blacklisted.");
      }
      return; // already blacklisted
    }
    Integer failures = nodeFailures.remove(hostName);
    failures = failures == null ? Integer.valueOf(0) : failures;
    failures++;
    LOG.info(failures + " failures on node " + hostName);
    if (failures >= maxTaskFailuresPerNode) {
      blacklistedNodes.add(hostName);
      // Even if blacklisting is ignored, continue to remove the host from
      // the request table. The RM may have additional nodes it can allocate on.
      LOG.info("Blacklisted host " + hostName);

      // remove all the requests corresponding to this hostname
      for (Map<String, Map<Resource, ResourceRequest>> remoteRequests :
          remoteRequestsTable.values()) {
        // remove from host if no pending allocations
        boolean foundAll = true;
        Map<Resource, ResourceRequest> reqMap = remoteRequests.get(hostName);
        if (reqMap != null) {
          for (ResourceRequest req : reqMap.values()) {
            if (!ask.remove(req)) {
              foundAll = false;
              // if ask already sent to RM, we can try and overwrite it if possible.
              // send a new ask to RM with numContainers
              // specified for the blacklisted host to be 0.
              ResourceRequest zeroedRequest =
                  ResourceRequest.newInstance(
                      req.getPriority(),
                      req.getResourceName(),
                      req.getCapability(),
                      req.getNumContainers(),
                      req.getRelaxLocality());

              zeroedRequest.setNumContainers(0);
              // to be sent to RM on next heartbeat
              addResourceRequestToAsk(zeroedRequest);
            }
          }
          // if all requests were still in ask queue
          // we can remove this request
          if (foundAll) {
            remoteRequests.remove(hostName);
          }
        }
        // TODO handling of rack blacklisting
        // Removing from rack should be dependent on no. of failures within the rack
        // Blacklisting a rack on the basis of a single node's blacklisting
        // may be overly aggressive.
        // Node failures could be co-related with other failures on the same rack
        // but we probably need a better approach at trying to decide how and when
        // to blacklist a rack
      }
    } else {
      nodeFailures.put(hostName, failures);
    }
  }
  private void updateQueueWithAllocateRequest(
      Allocation allocation,
      ApplicationAttemptId attemptId,
      List<ResourceRequest> resourceRequests,
      List<ContainerId> containerIds)
      throws IOException {
    // update queue information
    Resource pendingResource = Resources.createResource(0, 0);
    Resource allocatedResource = Resources.createResource(0, 0);
    String queueName = appQueueMap.get(attemptId.getApplicationId());
    // container requested
    for (ResourceRequest request : resourceRequests) {
      if (request.getResourceName().equals(ResourceRequest.ANY)) {
        Resources.addTo(
            pendingResource,
            Resources.multiply(request.getCapability(), request.getNumContainers()));
      }
    }
    // container allocated
    for (Container container : allocation.getContainers()) {
      Resources.addTo(allocatedResource, container.getResource());
      Resources.subtractFrom(pendingResource, container.getResource());
    }
    // container released from AM
    SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId);
    for (ContainerId containerId : containerIds) {
      Container container = null;
      for (RMContainer c : report.getLiveContainers()) {
        if (c.getContainerId().equals(containerId)) {
          container = c.getContainer();
          break;
        }
      }
      if (container != null) {
        // released allocated containers
        Resources.subtractFrom(allocatedResource, container.getResource());
      } else {
        for (RMContainer c : report.getReservedContainers()) {
          if (c.getContainerId().equals(containerId)) {
            container = c.getContainer();
            break;
          }
        }
        if (container != null) {
          // released reserved containers
          Resources.subtractFrom(pendingResource, container.getResource());
        }
      }
    }
    // containers released/preemption from scheduler
    Set<ContainerId> preemptionContainers = new HashSet<ContainerId>();
    if (allocation.getContainerPreemptions() != null) {
      preemptionContainers.addAll(allocation.getContainerPreemptions());
    }
    if (allocation.getStrictContainerPreemptions() != null) {
      preemptionContainers.addAll(allocation.getStrictContainerPreemptions());
    }
    if (!preemptionContainers.isEmpty()) {
      for (ContainerId containerId : preemptionContainers) {
        if (!preemptionContainerMap.containsKey(containerId)) {
          Container container = null;
          for (RMContainer c : report.getLiveContainers()) {
            if (c.getContainerId().equals(containerId)) {
              container = c.getContainer();
              break;
            }
          }
          if (container != null) {
            preemptionContainerMap.put(containerId, container.getResource());
          }
        }
      }
    }

    // update metrics
    SortedMap<String, Counter> counterMap = metrics.getCounters();
    String names[] =
        new String[] {
          "counter.queue." + queueName + ".pending.memory",
          "counter.queue." + queueName + ".pending.cores",
          "counter.queue." + queueName + ".allocated.memory",
          "counter.queue." + queueName + ".allocated.cores"
        };
    int values[] =
        new int[] {
          pendingResource.getMemory(),
          pendingResource.getVirtualCores(),
          allocatedResource.getMemory(),
          allocatedResource.getVirtualCores()
        };
    for (int i = names.length - 1; i >= 0; i--) {
      if (!counterMap.containsKey(names[i])) {
        metrics.counter(names[i]);
        counterMap = metrics.getCounters();
      }
      counterMap.get(names[i]).inc(values[i]);
    }

    queueLock.lock();
    try {
      if (!schedulerMetrics.isTracked(queueName)) {
        schedulerMetrics.trackQueue(queueName);
      }
    } finally {
      queueLock.unlock();
    }
  }