/**
   * Return the level at which we are allowed to schedule containers. Given the thresholds
   * indicating how much time passed before relaxing scheduling constraints.
   */
  public synchronized NodeType getAllowedLocalityLevelByTime(
      Priority priority, long nodeLocalityDelayMs, long rackLocalityDelayMs, long currentTimeMs) {

    // if not being used, can schedule anywhere
    if (nodeLocalityDelayMs < 0 || rackLocalityDelayMs < 0) {
      return NodeType.OFF_SWITCH;
    }

    // default level is NODE_LOCAL
    if (!allowedLocalityLevel.containsKey(priority)) {
      allowedLocalityLevel.put(priority, NodeType.NODE_LOCAL);
      return NodeType.NODE_LOCAL;
    }

    NodeType allowed = allowedLocalityLevel.get(priority);

    // if level is already most liberal, we're done
    if (allowed.equals(NodeType.OFF_SWITCH)) {
      return NodeType.OFF_SWITCH;
    }

    // check waiting time
    long waitTime = currentTimeMs;
    if (lastScheduledContainer.containsKey(priority)) {
      waitTime -= lastScheduledContainer.get(priority);
    } else {
      waitTime -= getStartTime();
    }

    long thresholdTime =
        allowed.equals(NodeType.NODE_LOCAL) ? nodeLocalityDelayMs : rackLocalityDelayMs;

    if (waitTime > thresholdTime) {
      if (allowed.equals(NodeType.NODE_LOCAL)) {
        allowedLocalityLevel.put(priority, NodeType.RACK_LOCAL);
        resetSchedulingOpportunities(priority, currentTimeMs);
      } else if (allowed.equals(NodeType.RACK_LOCAL)) {
        allowedLocalityLevel.put(priority, NodeType.OFF_SWITCH);
        resetSchedulingOpportunities(priority, currentTimeMs);
      }
    }
    return allowedLocalityLevel.get(priority);
  }
  /**
   * Return the level at which we are allowed to schedule containers, given the current size of the
   * cluster and thresholds indicating how many nodes to fail at (as a fraction of cluster size)
   * before relaxing scheduling constraints.
   */
  public synchronized NodeType getAllowedLocalityLevel(
      Priority priority, int numNodes, double nodeLocalityThreshold, double rackLocalityThreshold) {
    // upper limit on threshold
    if (nodeLocalityThreshold > 1.0) {
      nodeLocalityThreshold = 1.0;
    }
    if (rackLocalityThreshold > 1.0) {
      rackLocalityThreshold = 1.0;
    }

    // If delay scheduling is not being used, can schedule anywhere
    if (nodeLocalityThreshold < 0.0 || rackLocalityThreshold < 0.0) {
      return NodeType.OFF_SWITCH;
    }

    // Default level is NODE_LOCAL
    if (!allowedLocalityLevel.containsKey(priority)) {
      allowedLocalityLevel.put(priority, NodeType.NODE_LOCAL);
      return NodeType.NODE_LOCAL;
    }

    NodeType allowed = allowedLocalityLevel.get(priority);

    // If level is already most liberal, we're done
    if (allowed.equals(NodeType.OFF_SWITCH)) return NodeType.OFF_SWITCH;

    double threshold =
        allowed.equals(NodeType.NODE_LOCAL) ? nodeLocalityThreshold : rackLocalityThreshold;

    // Relax locality constraints once we've surpassed threshold.
    if (getSchedulingOpportunities(priority) > (numNodes * threshold)) {
      if (allowed.equals(NodeType.NODE_LOCAL)) {
        allowedLocalityLevel.put(priority, NodeType.RACK_LOCAL);
        resetSchedulingOpportunities(priority);
      } else if (allowed.equals(NodeType.RACK_LOCAL)) {
        allowedLocalityLevel.put(priority, NodeType.OFF_SWITCH);
        resetSchedulingOpportunities(priority);
      }
    }
    return allowedLocalityLevel.get(priority);
  }
  private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (this) {
      for (Priority priority : prioritiesToTry) {
        if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        addSchedulingOpportunity(priority);

        // Check the AM resource usage for the leaf queue
        if (getLiveContainers().size() == 0 && !getUnmanagedAM()) {
          if (!getQueue().canRunAppAM(getAMResource())) {
            return Resources.none();
          }
        }

        ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved);
        }

        ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved);
        }
      }
    }
    return Resources.none();
  }
  public synchronized RMContainer allocate(
      NodeType type,
      FSSchedulerNode node,
      Priority priority,
      ResourceRequest request,
      Container container) {
    // Update allowed locality level
    NodeType allowed = allowedLocalityLevel.get(priority);
    if (allowed != null) {
      if (allowed.equals(NodeType.OFF_SWITCH)
          && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) {
        this.resetAllowedLocalityLevel(priority, type);
      } else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) {
        this.resetAllowedLocalityLevel(priority, type);
      }
    }

    // Required sanity check - AM can call 'allocate' to update resource
    // request without locking the scheduler, hence we need to check
    if (getTotalRequiredResources(priority) <= 0) {
      return null;
    }

    // Create RMContainer
    RMContainer rmContainer =
        new RMContainerImpl(
            container,
            getApplicationAttemptId(),
            node.getNodeID(),
            appSchedulingInfo.getUser(),
            rmContext);

    // Add it to allContainers list.
    newlyAllocatedContainers.add(rmContainer);
    liveContainers.put(container.getId(), rmContainer);

    // Update consumption and track allocations
    List<ResourceRequest> resourceRequestList =
        appSchedulingInfo.allocate(type, node, priority, request, container);
    Resources.addTo(currentConsumption, container.getResource());

    // Update resource requests related to "request" and store in RMContainer
    ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList);

    // Inform the container
    rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START));

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "allocate: applicationAttemptId="
              + container.getId().getApplicationAttemptId()
              + " container="
              + container.getId()
              + " host="
              + container.getNodeId().getHost()
              + " type="
              + type);
    }
    RMAuditLogger.logSuccess(
        getUser(),
        AuditConstants.ALLOC_CONTAINER,
        "SchedulerApp",
        getApplicationId(),
        container.getId());

    return rmContainer;
  }
Beispiel #5
0
  private Resource assignContainer(
      FSSchedulerNode node, boolean reserved, TransactionState transactionState) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    if (reserved) {
      RMContainer rmContainer = node.getReservedContainer();
      Priority priority = rmContainer.getReservedPriority();

      // Make sure the application still needs requests at this priority
      if (app.getTotalRequiredResources(priority) == 0) {
        unreserve(priority, node);
        return Resources.none();
      }
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : app.getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (app) {
      for (Priority priority : prioritiesToTry) {
        if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        app.addSchedulingOpportunity(priority);

        ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              app.getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              app.getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(
              node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(
              node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState);
        }

        ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(
              node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState);
        }
      }
    }
    return Resources.none();
  }