Esempio n. 1
0
  /**
   * Whether this app has containers requests that could be satisfied on the given node, if the node
   * had full space.
   */
  public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) {
    ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY);
    ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName());
    ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName());

    return
    // There must be outstanding requests at the given priority:
    anyRequest != null
        && anyRequest.getNumContainers() > 0
        &&
        // If locality relaxation is turned off at *-level, there must be a
        // non-zero request for the node's rack:
        (anyRequest.getRelaxLocality()
            || (rackRequest != null && rackRequest.getNumContainers() > 0))
        &&
        // If locality relaxation is turned off at rack-level, there must be a
        // non-zero request at the node:
        (rackRequest == null
            || rackRequest.getRelaxLocality()
            || (nodeRequest != null && nodeRequest.getNumContainers() > 0))
        &&
        // The requested container must be able to fit on the node:
        Resources.lessThanOrEqual(
            RESOURCE_CALCULATOR,
            null,
            anyRequest.getCapability(),
            node.getRMNode().getTotalCapability());
  }
  @Override
  public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
    Preconditions.checkArgument(
        progressIndicator >= 0, "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    List<ResourceRequest> askList = null;
    List<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
    List<String> blacklistToAdd = new ArrayList<String>();
    List<String> blacklistToRemove = new ArrayList<String>();

    try {
      synchronized (this) {
        askList = new ArrayList<ResourceRequest>(ask.size());
        for (ResourceRequest r : ask) {
          // create a copy of ResourceRequest as we might change it while the
          // RPC layer is using it to send info across
          askList.add(
              ResourceRequest.newInstance(
                  r.getPriority(),
                  r.getResourceName(),
                  r.getCapability(),
                  r.getNumContainers(),
                  r.getRelaxLocality(),
                  r.getNodeLabelExpression()));
        }
        releaseList = new ArrayList<ContainerId>(release);
        // optimistically clear this collection assuming no RPC failure
        ask.clear();
        release.clear();

        blacklistToAdd.addAll(blacklistAdditions);
        blacklistToRemove.addAll(blacklistRemovals);

        ResourceBlacklistRequest blacklistRequest =
            (blacklistToAdd != null) || (blacklistToRemove != null)
                ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove)
                : null;

        allocateRequest =
            AllocateRequest.newInstance(
                lastResponseId, progressIndicator, askList, releaseList, blacklistRequest);
        // clear blacklistAdditions and blacklistRemovals before
        // unsynchronized part
        blacklistAdditions.clear();
        blacklistRemovals.clear();
      }

      try {
        allocateResponse = rmClient.allocate(allocateRequest);
      } catch (ApplicationMasterNotRegisteredException e) {
        LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
        synchronized (this) {
          release.addAll(this.pendingRelease);
          blacklistAdditions.addAll(this.blacklistedNodes);
          for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr :
              remoteRequestsTable.values()) {
            for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) {
              for (ResourceRequestInfo request : capabalities.values()) {
                addResourceRequestToAsk(request.remoteRequest);
              }
            }
          }
        }
        // re register with RM
        registerApplicationMaster();
        allocateResponse = allocate(progressIndicator);
        return allocateResponse;
      }

      synchronized (this) {
        // update these on successful RPC
        clusterNodeCount = allocateResponse.getNumClusterNodes();
        lastResponseId = allocateResponse.getResponseId();
        clusterAvailableResources = allocateResponse.getAvailableResources();
        if (!allocateResponse.getNMTokens().isEmpty()) {
          populateNMTokens(allocateResponse.getNMTokens());
        }
        if (allocateResponse.getAMRMToken() != null) {
          updateAMRMToken(allocateResponse.getAMRMToken());
        }
        if (!pendingRelease.isEmpty()
            && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
          removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
        }
      }
    } finally {
      // TODO how to differentiate remote yarn exception vs error in rpc
      if (allocateResponse == null) {
        // we hit an exception in allocate()
        // preserve ask and release for next call to allocate()
        synchronized (this) {
          release.addAll(releaseList);
          // requests could have been added or deleted during call to allocate
          // If requests were added/removed then there is nothing to do since
          // the ResourceRequest object in ask would have the actual new value.
          // If ask does not have this ResourceRequest then it was unchanged and
          // so we can add the value back safely.
          // This assumes that there will no concurrent calls to allocate() and
          // so we dont have to worry about ask being changed in the
          // synchronized block at the beginning of this method.
          for (ResourceRequest oldAsk : askList) {
            if (!ask.contains(oldAsk)) {
              ask.add(oldAsk);
            }
          }

          blacklistAdditions.addAll(blacklistToAdd);
          blacklistRemovals.addAll(blacklistToRemove);
        }
      }
    }
    return allocateResponse;
  }
  protected void containerFailedOnHost(String hostName) {
    if (!nodeBlacklistingEnabled) {
      return;
    }
    if (blacklistedNodes.contains(hostName)) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Host " + hostName + " is already blacklisted.");
      }
      return; // already blacklisted
    }
    Integer failures = nodeFailures.remove(hostName);
    failures = failures == null ? Integer.valueOf(0) : failures;
    failures++;
    LOG.info(failures + " failures on node " + hostName);
    if (failures >= maxTaskFailuresPerNode) {
      blacklistedNodes.add(hostName);
      // Even if blacklisting is ignored, continue to remove the host from
      // the request table. The RM may have additional nodes it can allocate on.
      LOG.info("Blacklisted host " + hostName);

      // remove all the requests corresponding to this hostname
      for (Map<String, Map<Resource, ResourceRequest>> remoteRequests :
          remoteRequestsTable.values()) {
        // remove from host if no pending allocations
        boolean foundAll = true;
        Map<Resource, ResourceRequest> reqMap = remoteRequests.get(hostName);
        if (reqMap != null) {
          for (ResourceRequest req : reqMap.values()) {
            if (!ask.remove(req)) {
              foundAll = false;
              // if ask already sent to RM, we can try and overwrite it if possible.
              // send a new ask to RM with numContainers
              // specified for the blacklisted host to be 0.
              ResourceRequest zeroedRequest =
                  ResourceRequest.newInstance(
                      req.getPriority(),
                      req.getResourceName(),
                      req.getCapability(),
                      req.getNumContainers(),
                      req.getRelaxLocality());

              zeroedRequest.setNumContainers(0);
              // to be sent to RM on next heartbeat
              addResourceRequestToAsk(zeroedRequest);
            }
          }
          // if all requests were still in ask queue
          // we can remove this request
          if (foundAll) {
            remoteRequests.remove(hostName);
          }
        }
        // TODO handling of rack blacklisting
        // Removing from rack should be dependent on no. of failures within the rack
        // Blacklisting a rack on the basis of a single node's blacklisting
        // may be overly aggressive.
        // Node failures could be co-related with other failures on the same rack
        // but we probably need a better approach at trying to decide how and when
        // to blacklist a rack
      }
    } else {
      nodeFailures.put(hostName, failures);
    }
  }
Esempio n. 4
0
  private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (this) {
      for (Priority priority : prioritiesToTry) {
        if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        addSchedulingOpportunity(priority);

        // Check the AM resource usage for the leaf queue
        if (getLiveContainers().size() == 0 && !getUnmanagedAM()) {
          if (!getQueue().canRunAppAM(getAMResource())) {
            return Resources.none();
          }
        }

        ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved);
        }

        ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved);
        }
      }
    }
    return Resources.none();
  }
Esempio n. 5
0
  private Resource assignContainer(
      FSSchedulerNode node, boolean reserved, TransactionState transactionState) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    if (reserved) {
      RMContainer rmContainer = node.getReservedContainer();
      Priority priority = rmContainer.getReservedPriority();

      // Make sure the application still needs requests at this priority
      if (app.getTotalRequiredResources(priority) == 0) {
        unreserve(priority, node);
        return Resources.none();
      }
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : app.getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (app) {
      for (Priority priority : prioritiesToTry) {
        if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        app.addSchedulingOpportunity(priority);

        ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              app.getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              app.getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(
              node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(
              node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState);
        }

        ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(
              node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState);
        }
      }
    }
    return Resources.none();
  }