Example #1
0
 public static ResourceRequest newResourceRequest(
     Priority priority, String hostName, Resource capability, int numContainers) {
   ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class);
   request.setPriority(priority);
   request.setResourceName(hostName);
   request.setCapability(capability);
   request.setNumContainers(numContainers);
   return request;
 }
  /**
   * Changed the return type to AllocateResponse which use to hold a reference to AMResponse.
   *
   * <p>AMResponse seems to have disappeared in CDH 4.6
   *
   * @param requestedContainers
   * @param releasedContainers
   * @return
   * @throws YarnRemoteException
   */
  public AllocateResponse allocateRequest(
      List<ResourceRequest> requestedContainers, List<ContainerId> releasedContainers)
      throws YarnRemoteException {

    if (amResourceManager == null)
      throw new IllegalStateException(
          "Cannot send allocation request before connecting to the resource manager!");

    LOG.info(
        "Sending allocation request"
            + ", requestedSize="
            + requestedContainers.size()
            + ", releasedSize="
            + releasedContainers.size());

    for (ResourceRequest req : requestedContainers)
      LOG.info(
          "Requesting container, host="
              + req.getHostName()
              + ", amount="
              + req.getNumContainers()
              + ", memory="
              + req.getCapability().getMemory()
              + ", priority="
              + req.getPriority().getPriority());

    for (ContainerId rel : releasedContainers) LOG.info("Releasing container: " + rel.getId());

    AllocateRequest request = Records.newRecord(AllocateRequest.class);
    request.setResponseId(rmRequestId.incrementAndGet());
    request.setApplicationAttemptId(appAttemptId);
    request.addAllAsks(requestedContainers);
    request.addAllReleases(releasedContainers);

    AllocateResponse response = amResourceManager.allocate(request);

    // response.getAllocatedContainers()

    LOG.debug(
        "Got an allocation response, "
            + ", responseId="
            + response.getResponseId()
            + ", numClusterNodes="
            + response.getNumClusterNodes()
            + ", headroom="
            + response.getAvailableResources().getMemory()
            + ", allocatedSize="
            + response.getAllocatedContainers().size()
            + ", updatedNodes="
            + response.getUpdatedNodes().size()
            + ", reboot="
            + response.getReboot()
            + ", completedSize="
            + response.getCompletedContainersStatuses().size());

    return response;
  }
  /**
   * Whether this app has containers requests that could be satisfied on the given node, if the node
   * had full space.
   */
  public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) {
    ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY);
    ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName());
    ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName());

    return
    // There must be outstanding requests at the given priority:
    anyRequest != null
        && anyRequest.getNumContainers() > 0
        &&
        // If locality relaxation is turned off at *-level, there must be a
        // non-zero request for the node's rack:
        (anyRequest.getRelaxLocality()
            || (rackRequest != null && rackRequest.getNumContainers() > 0))
        &&
        // If locality relaxation is turned off at rack-level, there must be a
        // non-zero request at the node:
        (rackRequest == null
            || rackRequest.getRelaxLocality()
            || (nodeRequest != null && nodeRequest.getNumContainers() > 0))
        &&
        // The requested container must be able to fit on the node:
        Resources.lessThanOrEqual(
            RESOURCE_CALCULATOR,
            null,
            anyRequest.getCapability(),
            node.getRMNode().getTotalCapability());
  }
Example #4
0
  private ResourceRequest validateAndCreateResourceRequest(
      ApplicationSubmissionContext submissionContext, boolean isRecovery)
      throws InvalidResourceRequestException {
    // Validation of the ApplicationSubmissionContext needs to be completed
    // here. Only those fields that are dependent on RM's configuration are
    // checked here as they have to be validated whether they are part of new
    // submission or just being recovered.

    // Check whether AM resource requirements are within required limits
    if (!submissionContext.getUnmanagedAM()) {
      ResourceRequest amReq = submissionContext.getAMContainerResourceRequest();
      if (amReq == null) {
        amReq =
            BuilderUtils.newResourceRequest(
                RMAppAttemptImpl.AM_CONTAINER_PRIORITY,
                ResourceRequest.ANY,
                submissionContext.getResource(),
                1);
      }

      // set label expression for AM container
      if (null == amReq.getNodeLabelExpression()) {
        amReq.setNodeLabelExpression(submissionContext.getNodeLabelExpression());
      }

      try {
        SchedulerUtils.normalizeAndValidateRequest(
            amReq,
            scheduler.getMaximumResourceCapability(),
            submissionContext.getQueue(),
            scheduler,
            isRecovery,
            rmContext);
      } catch (InvalidResourceRequestException e) {
        LOG.warn(
            "RM app submission failed in validating AM resource request"
                + " for application "
                + submissionContext.getApplicationId(),
            e);
        throw e;
      }

      SchedulerUtils.normalizeRequest(
          amReq,
          scheduler.getResourceCalculator(),
          scheduler.getClusterResource(),
          scheduler.getMinimumResourceCapability(),
          scheduler.getMaximumResourceCapability(),
          scheduler.getMinimumResourceCapability());
      return amReq;
    }

    return null;
  }
Example #5
0
 public static ResourceRequest newResourceRequest(ResourceRequest r) {
   ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class);
   request.setPriority(r.getPriority());
   request.setResourceName(r.getResourceName());
   request.setCapability(r.getCapability());
   request.setNumContainers(r.getNumContainers());
   return request;
 }
  /**
   * Setup a container request on specified node
   *
   * @param node the specified node
   * @return ResourceRequest sent to RM
   */
  private ResourceRequest setupAContainerAskForRM(String node) {
    ResourceRequest request = Records.newRecord(ResourceRequest.class);
    request.setResourceName(node);
    request.setNumContainers(1); // important

    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(requestPriority);
    request.setPriority(priority);

    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(containerMemory);
    request.setCapability(capability);

    return request;
  }
  @Override
  public void updateDemand() {
    demand = Resources.createResource(0);
    // Demand is current consumption plus outstanding requests
    Resources.addTo(demand, getCurrentConsumption());

    // Add up outstanding resource requests
    synchronized (this) {
      for (Priority p : getPriorities()) {
        for (ResourceRequest r : getResourceRequests(p).values()) {
          Resource total = Resources.multiply(r.getCapability(), r.getNumContainers());
          Resources.addTo(demand, total);
        }
      }
    }
  }
  @Override
  protected AllocateResponse doContainerRequest() {
    List<ResourceRequest> requestedContainers = null;

    if (allocationDirty.getAndSet(false)) {
      requestedContainers = createRequests();
    } else {
      requestedContainers = EMPTY;
    }

    // add pending containers to be released
    List<ContainerId> release = new ArrayList<ContainerId>();
    ContainerId element = null;
    while ((element = releaseContainers.poll()) != null) {
      release.add(element);
    }

    if (log.isDebugEnabled()) {
      log.debug("Requesting containers using " + requestedContainers.size() + " requests.");
      for (ResourceRequest resourceRequest : requestedContainers) {
        log.debug(
            "ResourceRequest: "
                + resourceRequest
                + " with count="
                + resourceRequest.getNumContainers()
                + " with hostName="
                + resourceRequest.getResourceName());
      }
      log.debug("Releasing containers " + release.size());
      for (ContainerId cid : release) {
        log.debug("Release container=" + cid);
      }
      log.debug("Request id will be: " + requestId.get());
    }

    // build the allocation request
    AllocateRequest request = Records.newRecord(AllocateRequest.class);
    request.setResponseId(requestId.get());
    request.setAskList(requestedContainers);
    request.setReleaseList(release);
    request.setProgress(applicationProgress);

    // do request and return response
    AllocateResponse allocate = getRmTemplate().allocate(request);
    requestId.set(allocate.getResponseId());
    return allocate;
  }
  @Test
  public void testMove() {
    final String user = "******";
    Queue parentQueue = createQueue("parent", null);
    Queue oldQueue = createQueue("old", parentQueue);
    Queue newQueue = createQueue("new", parentQueue);
    QueueMetrics parentMetrics = parentQueue.getMetrics();
    QueueMetrics oldMetrics = oldQueue.getMetrics();
    QueueMetrics newMetrics = newQueue.getMetrics();

    ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
    SchedulerApplicationAttempt app =
        new SchedulerApplicationAttempt(
            appAttId, user, oldQueue, oldQueue.getActiveUsersManager(), null);
    oldMetrics.submitApp(user);

    // Resource request
    Resource requestedResource = Resource.newInstance(1536, 2);
    Priority requestedPriority = Priority.newInstance(2);
    ResourceRequest request =
        ResourceRequest.newInstance(requestedPriority, ResourceRequest.ANY, requestedResource, 3);
    app.updateResourceRequests(Arrays.asList(request));

    // Allocated container
    RMContainer container1 = createRMContainer(appAttId, 1, requestedResource);
    app.liveContainers.put(container1.getContainerId(), container1);
    SchedulerNode node = createNode();
    app.appSchedulingInfo.allocate(
        NodeType.OFF_SWITCH, node, requestedPriority, request, container1.getContainer());

    // Reserved container
    Priority prio1 = Priority.newInstance(1);
    Resource reservedResource = Resource.newInstance(2048, 3);
    RMContainer container2 =
        createReservedRMContainer(appAttId, 1, reservedResource, node.getNodeID(), prio1);
    Map<NodeId, RMContainer> reservations = new HashMap<NodeId, RMContainer>();
    reservations.put(node.getNodeID(), container2);
    app.reservedContainers.put(prio1, reservations);
    oldMetrics.reserveResource(user, reservedResource);

    checkQueueMetrics(oldMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4);
    checkQueueMetrics(newMetrics, 0, 0, 0, 0, 0, 0, 0, 0);
    checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4);

    app.move(newQueue);

    checkQueueMetrics(oldMetrics, 0, 0, 0, 0, 0, 0, 0, 0);
    checkQueueMetrics(newMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4);
    checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4);
  }
Example #10
0
  /**
   * Assign a container to this node to facilitate {@code request}. If node does not have enough
   * memory, create a reservation. This is called once we are sure the particular request should be
   * facilitated by this node.
   */
  private Resource assignContainer(
      FSSchedulerNode node,
      Priority priority,
      ResourceRequest request,
      NodeType type,
      boolean reserved,
      TransactionState transactionState) {

    // How much does this request need?
    Resource capability = request.getCapability();

    // How much does the node have?
    Resource available = node.getAvailableResource();

    Container container = null;
    if (reserved) {
      container = node.getReservedContainer().getContainer();
    } else {
      container = createContainer(app, node, capability, priority, transactionState);
    }

    // Can we allocate a container on this node?
    if (Resources.fitsIn(capability, available)) {
      // Inform the application of the new container for this request
      RMContainer allocatedContainer =
          app.allocate(type, node, priority, request, container, transactionState);
      if (allocatedContainer == null) {
        // Did the application need this resource?
        if (reserved) {
          unreserve(priority, node);
        }
        return Resources.none();
      }

      // If we had previously made a reservation, delete it
      if (reserved) {
        unreserve(priority, node);
      }

      // Inform the node
      node.allocateContainer(app.getApplicationId(), allocatedContainer);

      return container.getResource();
    } else {
      // The desired container won't fit here, so reserve
      reserve(priority, node, container, reserved, transactionState);

      return FairScheduler.CONTAINER_RESERVED;
    }
  }
  private void addResourceRequest(Priority priority, String resourceName, Resource capability) {
    Map<String, Map<Resource, ResourceRequest>> remoteRequests =
        this.remoteRequestsTable.get(priority);
    if (remoteRequests == null) {
      remoteRequests = new HashMap<String, Map<Resource, ResourceRequest>>();
      this.remoteRequestsTable.put(priority, remoteRequests);
      if (LOG.isDebugEnabled()) {
        LOG.debug("Added priority=" + priority);
      }
    }
    Map<Resource, ResourceRequest> reqMap = remoteRequests.get(resourceName);
    if (reqMap == null) {
      reqMap = new HashMap<Resource, ResourceRequest>();
      remoteRequests.put(resourceName, reqMap);
    }
    ResourceRequest remoteRequest = reqMap.get(capability);
    if (remoteRequest == null) {
      remoteRequest = recordFactory.newRecordInstance(ResourceRequest.class);
      remoteRequest.setPriority(priority);
      remoteRequest.setResourceName(resourceName);
      remoteRequest.setCapability(capability);
      remoteRequest.setNumContainers(0);
      reqMap.put(capability, remoteRequest);
    }
    remoteRequest.setNumContainers(remoteRequest.getNumContainers() + 1);

    // Note this down for next interaction with ResourceManager
    addResourceRequestToAsk(remoteRequest);
    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "addResourceRequest:"
              + " applicationId="
              + applicationId.getId()
              + " priority="
              + priority.getPriority()
              + " resourceName="
              + resourceName
              + " numContainers="
              + remoteRequest.getNumContainers()
              + " #asks="
              + ask.size());
    }
  }
Example #12
0
  /**
   * Assign a container to this node to facilitate {@code request}. If node does not have enough
   * memory, create a reservation. This is called once we are sure the particular request should be
   * facilitated by this node.
   *
   * @param node The node to try placing the container on.
   * @param request The ResourceRequest we're trying to satisfy.
   * @param type The locality of the assignment.
   * @param reserved Whether there's already a container reserved for this app on the node.
   * @return If an assignment was made, returns the resources allocated to the container. If a
   *     reservation was made, returns FairScheduler.CONTAINER_RESERVED. If no assignment or
   *     reservation was made, returns an empty resource.
   */
  private Resource assignContainer(
      FSSchedulerNode node, ResourceRequest request, NodeType type, boolean reserved) {

    // How much does this request need?
    Resource capability = request.getCapability();

    // How much does the node have?
    Resource available = node.getAvailableResource();

    Container container = null;
    if (reserved) {
      container = node.getReservedContainer().getContainer();
    } else {
      container = createContainer(node, capability, request.getPriority());
    }

    // Can we allocate a container on this node?
    if (Resources.fitsIn(capability, available)) {
      // Inform the application of the new container for this request
      RMContainer allocatedContainer =
          allocate(type, node, request.getPriority(), request, container);
      if (allocatedContainer == null) {
        // Did the application need this resource?
        if (reserved) {
          unreserve(request.getPriority(), node);
        }
        return Resources.none();
      }

      // If we had previously made a reservation, delete it
      if (reserved) {
        unreserve(request.getPriority(), node);
      }

      // Inform the node
      node.allocateContainer(allocatedContainer);

      // If this container is used to run AM, update the leaf queue's AM usage
      if (getLiveContainers().size() == 1 && !getUnmanagedAM()) {
        getQueue().addAMResourceUsage(container.getResource());
        setAmRunning(true);
      }

      return container.getResource();
    } else {
      // The desired container won't fit here, so reserve
      reserve(request.getPriority(), node, container, reserved);

      return FairScheduler.CONTAINER_RESERVED;
    }
  }
 /**
  * Utility method creating a {@link ResourceRequest}.
  *
  * @param numContainers number of containers to request
  * @return request to be sent to resource manager
  */
 private ResourceRequest getContainerResourceRequest(
     int numContainers, String hostName, boolean relaxLocality) {
   ResourceRequest request = Records.newRecord(ResourceRequest.class);
   request.setRelaxLocality(relaxLocality);
   request.setResourceName(hostName);
   request.setNumContainers(numContainers);
   Priority pri = Records.newRecord(Priority.class);
   pri.setPriority(priority);
   request.setPriority(pri);
   Resource capability = Records.newRecord(Resource.class);
   capability.setMemory(memory);
   ResourceCompat.setVirtualCores(capability, virtualcores);
   request.setCapability(capability);
   return request;
 }
Example #14
0
  private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (this) {
      for (Priority priority : prioritiesToTry) {
        if (getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        addSchedulingOpportunity(priority);

        // Check the AM resource usage for the leaf queue
        if (getLiveContainers().size() == 0 && !getUnmanagedAM()) {
          if (!getQueue().canRunAppAM(getAMResource())) {
            return Resources.none();
          }
        }

        ResourceRequest rackLocalRequest = getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(node, localRequest, NodeType.NODE_LOCAL, reserved);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(node, rackLocalRequest, NodeType.RACK_LOCAL, reserved);
        }

        ResourceRequest offSwitchRequest = getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(node, offSwitchRequest, NodeType.OFF_SWITCH, reserved);
        }
      }
    }
    return Resources.none();
  }
  private void decResourceRequest(Priority priority, String resourceName, Resource capability) {
    Map<String, Map<Resource, ResourceRequest>> remoteRequests =
        this.remoteRequestsTable.get(priority);
    Map<Resource, ResourceRequest> reqMap = remoteRequests.get(resourceName);
    if (reqMap == null) {
      // as we modify the resource requests by filtering out blacklisted hosts
      // when they are added, this value may be null when being
      // decremented
      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "Not decrementing resource as " + resourceName + " is not present in request table");
      }
      return;
    }
    ResourceRequest remoteRequest = reqMap.get(capability);

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "BEFORE decResourceRequest:"
              + " applicationId="
              + applicationId.getId()
              + " priority="
              + priority.getPriority()
              + " resourceName="
              + resourceName
              + " numContainers="
              + remoteRequest.getNumContainers()
              + " #asks="
              + ask.size());
    }

    if (remoteRequest.getNumContainers() > 0) {
      // based on blacklisting comments above we can end up decrementing more
      // than requested. so guard for that.
      remoteRequest.setNumContainers(remoteRequest.getNumContainers() - 1);
    }

    if (remoteRequest.getNumContainers() == 0) {
      reqMap.remove(capability);
      if (reqMap.size() == 0) {
        remoteRequests.remove(resourceName);
      }
      if (remoteRequests.size() == 0) {
        remoteRequestsTable.remove(priority);
      }
    }

    // send the updated resource request to RM
    // send 0 container count requests also to cancel previous requests
    addResourceRequestToAsk(remoteRequest);

    if (LOG.isDebugEnabled()) {
      LOG.info(
          "AFTER decResourceRequest:"
              + " applicationId="
              + applicationId.getId()
              + " priority="
              + priority.getPriority()
              + " resourceName="
              + resourceName
              + " numContainers="
              + remoteRequest.getNumContainers()
              + " #asks="
              + ask.size());
    }
  }
  private void updateQueueWithAllocateRequest(
      Allocation allocation,
      ApplicationAttemptId attemptId,
      List<ResourceRequest> resourceRequests,
      List<ContainerId> containerIds)
      throws IOException {
    // update queue information
    Resource pendingResource = Resources.createResource(0, 0);
    Resource allocatedResource = Resources.createResource(0, 0);
    String queueName = appQueueMap.get(attemptId.getApplicationId());
    // container requested
    for (ResourceRequest request : resourceRequests) {
      if (request.getResourceName().equals(ResourceRequest.ANY)) {
        Resources.addTo(
            pendingResource,
            Resources.multiply(request.getCapability(), request.getNumContainers()));
      }
    }
    // container allocated
    for (Container container : allocation.getContainers()) {
      Resources.addTo(allocatedResource, container.getResource());
      Resources.subtractFrom(pendingResource, container.getResource());
    }
    // container released from AM
    SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId);
    for (ContainerId containerId : containerIds) {
      Container container = null;
      for (RMContainer c : report.getLiveContainers()) {
        if (c.getContainerId().equals(containerId)) {
          container = c.getContainer();
          break;
        }
      }
      if (container != null) {
        // released allocated containers
        Resources.subtractFrom(allocatedResource, container.getResource());
      } else {
        for (RMContainer c : report.getReservedContainers()) {
          if (c.getContainerId().equals(containerId)) {
            container = c.getContainer();
            break;
          }
        }
        if (container != null) {
          // released reserved containers
          Resources.subtractFrom(pendingResource, container.getResource());
        }
      }
    }
    // containers released/preemption from scheduler
    Set<ContainerId> preemptionContainers = new HashSet<ContainerId>();
    if (allocation.getContainerPreemptions() != null) {
      preemptionContainers.addAll(allocation.getContainerPreemptions());
    }
    if (allocation.getStrictContainerPreemptions() != null) {
      preemptionContainers.addAll(allocation.getStrictContainerPreemptions());
    }
    if (!preemptionContainers.isEmpty()) {
      for (ContainerId containerId : preemptionContainers) {
        if (!preemptionContainerMap.containsKey(containerId)) {
          Container container = null;
          for (RMContainer c : report.getLiveContainers()) {
            if (c.getContainerId().equals(containerId)) {
              container = c.getContainer();
              break;
            }
          }
          if (container != null) {
            preemptionContainerMap.put(containerId, container.getResource());
          }
        }
      }
    }

    // update metrics
    SortedMap<String, Counter> counterMap = metrics.getCounters();
    String names[] =
        new String[] {
          "counter.queue." + queueName + ".pending.memory",
          "counter.queue." + queueName + ".pending.cores",
          "counter.queue." + queueName + ".allocated.memory",
          "counter.queue." + queueName + ".allocated.cores"
        };
    int values[] =
        new int[] {
          pendingResource.getMemory(),
          pendingResource.getVirtualCores(),
          allocatedResource.getMemory(),
          allocatedResource.getVirtualCores()
        };
    for (int i = names.length - 1; i >= 0; i--) {
      if (!counterMap.containsKey(names[i])) {
        metrics.counter(names[i]);
        counterMap = metrics.getCounters();
      }
      counterMap.get(names[i]).inc(values[i]);
    }

    queueLock.lock();
    try {
      if (!schedulerMetrics.isTracked(queueName)) {
        schedulerMetrics.trackQueue(queueName);
      }
    } finally {
      queueLock.unlock();
    }
  }
  @Override
  public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
    Preconditions.checkArgument(
        progressIndicator >= 0, "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    List<ResourceRequest> askList = null;
    List<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
    List<String> blacklistToAdd = new ArrayList<String>();
    List<String> blacklistToRemove = new ArrayList<String>();

    try {
      synchronized (this) {
        askList = new ArrayList<ResourceRequest>(ask.size());
        for (ResourceRequest r : ask) {
          // create a copy of ResourceRequest as we might change it while the
          // RPC layer is using it to send info across
          askList.add(
              ResourceRequest.newInstance(
                  r.getPriority(),
                  r.getResourceName(),
                  r.getCapability(),
                  r.getNumContainers(),
                  r.getRelaxLocality(),
                  r.getNodeLabelExpression()));
        }
        releaseList = new ArrayList<ContainerId>(release);
        // optimistically clear this collection assuming no RPC failure
        ask.clear();
        release.clear();

        blacklistToAdd.addAll(blacklistAdditions);
        blacklistToRemove.addAll(blacklistRemovals);

        ResourceBlacklistRequest blacklistRequest =
            (blacklistToAdd != null) || (blacklistToRemove != null)
                ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove)
                : null;

        allocateRequest =
            AllocateRequest.newInstance(
                lastResponseId, progressIndicator, askList, releaseList, blacklistRequest);
        // clear blacklistAdditions and blacklistRemovals before
        // unsynchronized part
        blacklistAdditions.clear();
        blacklistRemovals.clear();
      }

      try {
        allocateResponse = rmClient.allocate(allocateRequest);
      } catch (ApplicationMasterNotRegisteredException e) {
        LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
        synchronized (this) {
          release.addAll(this.pendingRelease);
          blacklistAdditions.addAll(this.blacklistedNodes);
          for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr :
              remoteRequestsTable.values()) {
            for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) {
              for (ResourceRequestInfo request : capabalities.values()) {
                addResourceRequestToAsk(request.remoteRequest);
              }
            }
          }
        }
        // re register with RM
        registerApplicationMaster();
        allocateResponse = allocate(progressIndicator);
        return allocateResponse;
      }

      synchronized (this) {
        // update these on successful RPC
        clusterNodeCount = allocateResponse.getNumClusterNodes();
        lastResponseId = allocateResponse.getResponseId();
        clusterAvailableResources = allocateResponse.getAvailableResources();
        if (!allocateResponse.getNMTokens().isEmpty()) {
          populateNMTokens(allocateResponse.getNMTokens());
        }
        if (allocateResponse.getAMRMToken() != null) {
          updateAMRMToken(allocateResponse.getAMRMToken());
        }
        if (!pendingRelease.isEmpty()
            && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
          removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
        }
      }
    } finally {
      // TODO how to differentiate remote yarn exception vs error in rpc
      if (allocateResponse == null) {
        // we hit an exception in allocate()
        // preserve ask and release for next call to allocate()
        synchronized (this) {
          release.addAll(releaseList);
          // requests could have been added or deleted during call to allocate
          // If requests were added/removed then there is nothing to do since
          // the ResourceRequest object in ask would have the actual new value.
          // If ask does not have this ResourceRequest then it was unchanged and
          // so we can add the value back safely.
          // This assumes that there will no concurrent calls to allocate() and
          // so we dont have to worry about ask being changed in the
          // synchronized block at the beginning of this method.
          for (ResourceRequest oldAsk : askList) {
            if (!ask.contains(oldAsk)) {
              ask.add(oldAsk);
            }
          }

          blacklistAdditions.addAll(blacklistToAdd);
          blacklistRemovals.addAll(blacklistToRemove);
        }
      }
    }
    return allocateResponse;
  }
Example #18
0
  private Resource assignContainer(
      FSSchedulerNode node, boolean reserved, TransactionState transactionState) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
    }

    if (reserved) {
      RMContainer rmContainer = node.getReservedContainer();
      Priority priority = rmContainer.getReservedPriority();

      // Make sure the application still needs requests at this priority
      if (app.getTotalRequiredResources(priority) == 0) {
        unreserve(priority, node);
        return Resources.none();
      }
    }

    Collection<Priority> prioritiesToTry =
        (reserved)
            ? Arrays.asList(node.getReservedContainer().getReservedPriority())
            : app.getPriorities();

    // For each priority, see if we can schedule a node local, rack local
    // or off-switch request. Rack of off-switch requests may be delayed
    // (not scheduled) in order to promote better locality.
    synchronized (app) {
      for (Priority priority : prioritiesToTry) {
        if (app.getTotalRequiredResources(priority) <= 0 || !hasContainerForNode(priority, node)) {
          continue;
        }

        app.addSchedulingOpportunity(priority);

        ResourceRequest rackLocalRequest = app.getResourceRequest(priority, node.getRackName());
        ResourceRequest localRequest = app.getResourceRequest(priority, node.getNodeName());

        if (localRequest != null && !localRequest.getRelaxLocality()) {
          LOG.warn("Relax locality off is not supported on local request: " + localRequest);
        }

        NodeType allowedLocality;
        if (scheduler.isContinuousSchedulingEnabled()) {
          allowedLocality =
              app.getAllowedLocalityLevelByTime(
                  priority,
                  scheduler.getNodeLocalityDelayMs(),
                  scheduler.getRackLocalityDelayMs(),
                  scheduler.getClock().getTime());
        } else {
          allowedLocality =
              app.getAllowedLocalityLevel(
                  priority,
                  scheduler.getNumClusterNodes(),
                  scheduler.getNodeLocalityThreshold(),
                  scheduler.getRackLocalityThreshold());
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && localRequest != null
            && localRequest.getNumContainers() != 0) {
          return assignContainer(
              node, priority, localRequest, NodeType.NODE_LOCAL, reserved, transactionState);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

        if (rackLocalRequest != null
            && rackLocalRequest.getNumContainers() != 0
            && (allowedLocality.equals(NodeType.RACK_LOCAL)
                || allowedLocality.equals(NodeType.OFF_SWITCH))) {
          return assignContainer(
              node, priority, rackLocalRequest, NodeType.RACK_LOCAL, reserved, transactionState);
        }

        ResourceRequest offSwitchRequest = app.getResourceRequest(priority, ResourceRequest.ANY);
        if (offSwitchRequest != null && !offSwitchRequest.getRelaxLocality()) {
          continue;
        }

        if (offSwitchRequest != null
            && offSwitchRequest.getNumContainers() != 0
            && allowedLocality.equals(NodeType.OFF_SWITCH)) {
          return assignContainer(
              node, priority, offSwitchRequest, NodeType.OFF_SWITCH, reserved, transactionState);
        }
      }
    }
    return Resources.none();
  }
 ResourceRequestInfo(
     Priority priority, String resourceName, Resource capability, boolean relaxLocality) {
   remoteRequest = ResourceRequest.newInstance(priority, resourceName, capability, 0);
   remoteRequest.setRelaxLocality(relaxLocality);
   containerRequests = new LinkedHashSet<T>();
 }
  protected void containerFailedOnHost(String hostName) {
    if (!nodeBlacklistingEnabled) {
      return;
    }
    if (blacklistedNodes.contains(hostName)) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Host " + hostName + " is already blacklisted.");
      }
      return; // already blacklisted
    }
    Integer failures = nodeFailures.remove(hostName);
    failures = failures == null ? Integer.valueOf(0) : failures;
    failures++;
    LOG.info(failures + " failures on node " + hostName);
    if (failures >= maxTaskFailuresPerNode) {
      blacklistedNodes.add(hostName);
      // Even if blacklisting is ignored, continue to remove the host from
      // the request table. The RM may have additional nodes it can allocate on.
      LOG.info("Blacklisted host " + hostName);

      // remove all the requests corresponding to this hostname
      for (Map<String, Map<Resource, ResourceRequest>> remoteRequests :
          remoteRequestsTable.values()) {
        // remove from host if no pending allocations
        boolean foundAll = true;
        Map<Resource, ResourceRequest> reqMap = remoteRequests.get(hostName);
        if (reqMap != null) {
          for (ResourceRequest req : reqMap.values()) {
            if (!ask.remove(req)) {
              foundAll = false;
              // if ask already sent to RM, we can try and overwrite it if possible.
              // send a new ask to RM with numContainers
              // specified for the blacklisted host to be 0.
              ResourceRequest zeroedRequest =
                  ResourceRequest.newInstance(
                      req.getPriority(),
                      req.getResourceName(),
                      req.getCapability(),
                      req.getNumContainers(),
                      req.getRelaxLocality());

              zeroedRequest.setNumContainers(0);
              // to be sent to RM on next heartbeat
              addResourceRequestToAsk(zeroedRequest);
            }
          }
          // if all requests were still in ask queue
          // we can remove this request
          if (foundAll) {
            remoteRequests.remove(hostName);
          }
        }
        // TODO handling of rack blacklisting
        // Removing from rack should be dependent on no. of failures within the rack
        // Blacklisting a rack on the basis of a single node's blacklisting
        // may be overly aggressive.
        // Node failures could be co-related with other failures on the same rack
        // but we probably need a better approach at trying to decide how and when
        // to blacklist a rack
      }
    } else {
      nodeFailures.put(hostName, failures);
    }
  }