Exemplo n.º 1
0
 public SchedulerNode(RMNode node, boolean usePortForNodeName) {
   this.rmNode = node;
   this.availableResource = Resources.clone(node.getTotalCapability());
   this.totalResourceCapability = Resources.clone(node.getTotalCapability());
   if (usePortForNodeName) {
     nodeName = rmNode.getHostName() + ":" + node.getNodeID().getPort();
   } else {
     nodeName = rmNode.getHostName();
   }
 }
  /**
   * Release an allocated container on this node.
   *
   * @param container container to be released
   */
  public synchronized void releaseContainer(Container container) {
    if (!isValidContainer(container)) {
      LOG.error("Invalid container released " + container);
      return;
    }

    /* remove the containers from the nodemanger */
    if (null != launchedContainers.remove(container.getId())) {
      updateResource(container);
    }

    LOG.info(
        "Released container "
            + container.getId()
            + " of capacity "
            + container.getResource()
            + " on host "
            + rmNode.getNodeAddress()
            + ", which currently has "
            + numContainers
            + " containers, "
            + getUsedResource()
            + " used and "
            + getAvailableResource()
            + " available"
            + ", release resources="
            + true);
  }
 private synchronized void deductAvailableResource(Resource resource) {
   if (resource == null) {
     LOG.error("Invalid deduction of null resource for " + rmNode.getNodeAddress());
     return;
   }
   Resources.subtractFrom(availableResource, resource);
   Resources.addTo(usedResource, resource);
 }
Exemplo n.º 4
0
  @Override
  public UpdateNodeResourceResponse updateNodeResource(UpdateNodeResourceRequest request)
      throws YarnException, IOException {
    Map<NodeId, ResourceOption> nodeResourceMap = request.getNodeResourceMap();
    Set<NodeId> nodeIds = nodeResourceMap.keySet();
    // verify nodes are all valid first.
    // if any invalid nodes, throw exception instead of partially updating
    // valid nodes.
    for (NodeId nodeId : nodeIds) {
      RMNode node = this.rmContext.getActiveRMNodes().get(nodeId);
      if (node == null) {
        LOG.error(
            "Resource update get failed on all nodes due to change "
                + "resource on an unrecognized node: "
                + nodeId);
        throw RPCUtil.getRemoteException(
            "Resource update get failed on all nodes due to change resource "
                + "on an unrecognized node: "
                + nodeId);
      }
    }

    // do resource update on each node.
    // Notice: it is still possible to have invalid NodeIDs as nodes decommission
    // may happen just at the same time. This time, only log and skip absent
    // nodes without throwing any exceptions.
    for (Map.Entry<NodeId, ResourceOption> entry : nodeResourceMap.entrySet()) {
      ResourceOption newResourceOption = entry.getValue();
      NodeId nodeId = entry.getKey();
      RMNode node = this.rmContext.getActiveRMNodes().get(nodeId);
      if (node == null) {
        LOG.warn("Resource update get failed on an unrecognized node: " + nodeId);
      } else {
        node.setResourceOption(newResourceOption);
        LOG.info(
            "Update resource successfully on node("
                + node.getNodeID()
                + ") with resource("
                + newResourceOption.toString()
                + ")");
      }
    }
    UpdateNodeResourceResponse response =
        recordFactory.newRecordInstance(UpdateNodeResourceResponse.class);
    return response;
  }
 @Override
 public String toString() {
   return "host: "
       + rmNode.getNodeAddress()
       + " #containers="
       + getNumContainers()
       + " available="
       + getAvailableResource().getMemory()
       + " used="
       + getUsedResource().getMemory();
 }
 public FiCaSchedulerNode(RMNode node, boolean usePortForNodeName) {
   this.rmNode = node;
   this.availableResource.setMemory(node.getTotalCapability().getMemory());
   this.availableResource.setVirtualCores(node.getTotalCapability().getVirtualCores());
   totalResourceCapability =
       Resource.newInstance(
           node.getTotalCapability().getMemory(), node.getTotalCapability().getVirtualCores());
   if (usePortForNodeName) {
     nodeName = rmNode.getHostName() + ":" + node.getNodeID().getPort();
   } else {
     nodeName = rmNode.getHostName();
   }
 }
  /**
   * The Scheduler has allocated containers on this node to the given application.
   *
   * @param applicationId application
   * @param rmContainer allocated container
   */
  public synchronized void allocateContainer(ApplicationId applicationId, RMContainer rmContainer) {
    Container container = rmContainer.getContainer();
    deductAvailableResource(container.getResource());
    ++numContainers;

    launchedContainers.put(container.getId(), rmContainer);

    LOG.info(
        "Assigned container "
            + container.getId()
            + " of capacity "
            + container.getResource()
            + " on host "
            + rmNode.getNodeAddress()
            + ", which currently has "
            + numContainers
            + " containers, "
            + getUsedResource()
            + " used and "
            + getAvailableResource()
            + " available");
  }
  @Test
  public void testReconnectNode() throws Exception {
    final DrainDispatcher dispatcher = new DrainDispatcher();
    rm =
        new MockRM() {
          @Override
          protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
            return new SchedulerEventDispatcher(this.scheduler) {
              @Override
              public void handle(SchedulerEvent event) {
                scheduler.handle(event);
              }
            };
          }

          @Override
          protected Dispatcher createDispatcher() {
            return dispatcher;
          }
        };
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 5120);
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(false);
    dispatcher.await();
    checkUnealthyNMCount(rm, nm2, true, 1);
    final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs();
    QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
    // TODO Metrics incorrect in case of the FifoScheduler
    Assert.assertEquals(5120, metrics.getAvailableMB());

    // reconnect of healthy node
    nm1 = rm.registerNode("host1:1234", 5120);
    NodeHeartbeatResponse response = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    dispatcher.await();
    Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
    checkUnealthyNMCount(rm, nm2, true, 1);

    // reconnect of unhealthy node
    nm2 = rm.registerNode("host2:5678", 5120);
    response = nm2.nodeHeartbeat(false);
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    dispatcher.await();
    Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
    checkUnealthyNMCount(rm, nm2, true, 1);

    // unhealthy node changed back to healthy
    nm2 = rm.registerNode("host2:5678", 5120);
    dispatcher.await();
    response = nm2.nodeHeartbeat(true);
    response = nm2.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals(5120 + 5120, metrics.getAvailableMB());

    // reconnect of node with changed capability
    nm1 = rm.registerNode("host2:5678", 10240);
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    Assert.assertEquals(5120 + 10240, metrics.getAvailableMB());

    // reconnect of node with changed capability and running applications
    List<ApplicationId> runningApps = new ArrayList<ApplicationId>();
    runningApps.add(ApplicationId.newInstance(1, 0));
    nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps);
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());

    // reconnect healthy node changing http port
    nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService());
    nm1.setHttpPort(3);
    nm1.registerNode();
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    Assert.assertEquals(3, rmNode.getHttpPort());
    Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory());
    Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
  }