/** Decommissioning using a post-configured exclude hosts file */
 @Test
 public void testAddNewExcludePathToConfiguration() throws Exception {
   Configuration conf = new Configuration();
   rm = new MockRM(conf);
   rm.start();
   MockNM nm1 = rm.registerNode("host1:1234", 5120);
   MockNM nm2 = rm.registerNode("host2:5678", 10240);
   ClusterMetrics metrics = ClusterMetrics.getMetrics();
   assert (metrics != null);
   int initialMetricCount = metrics.getNumDecommisionedNMs();
   NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
   Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
   nodeHeartbeat = nm2.nodeHeartbeat(true);
   Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
   writeToHostsFile("host2");
   conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());
   rm.getNodesListManager().refreshNodes(conf);
   checkDecommissionedNMCount(rm, ++initialMetricCount);
   nodeHeartbeat = nm1.nodeHeartbeat(true);
   Assert.assertEquals(
       "Node should not have been decomissioned.",
       NodeAction.NORMAL,
       nodeHeartbeat.getNodeAction());
   nodeHeartbeat = nm2.nodeHeartbeat(true);
   Assert.assertEquals(
       "Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(),
       NodeAction.SHUTDOWN,
       nodeHeartbeat.getNodeAction());
 }
  /** @throws Exception If failed. */
  public void testClusterNodeMetrics() throws Exception {
    final Ignite ignite0 = grid();
    final Ignite ignite1 = startGrid(1);

    GridTestUtils.waitForCondition(
        new GridAbsPredicate() {
          @Override
          public boolean apply() {
            return ignite0.cluster().nodes().size() == 2 && ignite1.cluster().nodes().size() == 2;
          }
        },
        3000L);

    ClusterMetrics metrics0 = ignite0.cluster().localNode().metrics();

    ClusterMetrics nodesMetrics =
        ignite0
            .cluster()
            .forNode(ignite0.cluster().localNode(), ignite1.cluster().localNode())
            .metrics();

    assertEquals(metrics0.getTotalCpus(), nodesMetrics.getTotalCpus());
    assertEquals(1, metrics0.getTotalNodes());
    assertEquals(2, nodesMetrics.getTotalNodes());

    assert metrics0.getHeapMemoryUsed() > 0;
    assert metrics0.getHeapMemoryTotal() > 0;
    assert metrics0.getNonHeapMemoryMaximum() > 0;
  }
  /** @throws Exception If failed. */
  public void testIoMetrics() throws Exception {
    Ignite ignite0 = grid();
    Ignite ignite1 = startGrid(1);

    Object msg = new TestMessage();

    int size = ignite0.configuration().getMarshaller().marshal(msg).length;

    assert size > MSG_SIZE;

    final CountDownLatch latch = new CountDownLatch(MSG_CNT);

    ignite0
        .message()
        .localListen(
            null,
            new MessagingListenActor<TestMessage>() {
              @Override
              protected void receive(UUID nodeId, TestMessage rcvMsg) throws Throwable {
                latch.countDown();
              }
            });

    ignite1
        .message()
        .localListen(
            null,
            new MessagingListenActor<TestMessage>() {
              @Override
              protected void receive(UUID nodeId, TestMessage rcvMsg) throws Throwable {
                respond(rcvMsg);
              }
            });

    for (int i = 0; i < MSG_CNT; i++) message(ignite0.cluster().forRemotes()).send(null, msg);

    latch.await();

    ClusterMetrics metrics = ignite0.cluster().localNode().metrics();

    info("Node 0 metrics: " + metrics);

    // Time sync messages are being sent.
    assert metrics.getSentMessagesCount() >= MSG_CNT;
    assert metrics.getSentBytesCount() > size * MSG_CNT;
    assert metrics.getReceivedMessagesCount() >= MSG_CNT;
    assert metrics.getReceivedBytesCount() > size * MSG_CNT;

    metrics = ignite1.cluster().localNode().metrics();

    info("Node 1 metrics: " + metrics);

    // Time sync messages are being sent.
    assert metrics.getSentMessagesCount() >= MSG_CNT;
    assert metrics.getSentBytesCount() > size * MSG_CNT;
    assert metrics.getReceivedMessagesCount() >= MSG_CNT;
    assert metrics.getReceivedBytesCount() > size * MSG_CNT;
  }
 private void checkShutdownNMCount(MockRM rm, int count) throws InterruptedException {
   int waitCount = 0;
   while (ClusterMetrics.getMetrics().getNumShutdownNMs() != count && waitCount++ < 20) {
     synchronized (this) {
       wait(100);
     }
   }
   Assert.assertEquals(
       "The shutdown metrics are not updated",
       count,
       ClusterMetrics.getMetrics().getNumShutdownNMs());
 }
  @Test
  public void testInvalidNMUnregistration() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();
    ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
    int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs();
    int decommisionedNMsCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();

    // Node not found for unregister
    UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class);
    request.setNodeId(BuilderUtils.newNodeId("host", 1234));
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, 0);
    checkDecommissionedNMCount(rm, 0);

    // 1. Register the Node Manager
    // 2. Exclude the same Node Manager host
    // 3. Give NM heartbeat to RM
    // 4. Unregister the Node Manager
    MockNM nm1 = new MockNM("host1:1234", 5120, resourceTrackerService);
    RegisterNodeManagerResponse response = nm1.registerNode();
    Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction());
    writeToHostsFile("host2");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    NodeHeartbeatResponse heartbeatResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.SHUTDOWN, heartbeatResponse.getNodeAction());
    checkShutdownNMCount(rm, shutdownNMsCount);
    checkDecommissionedNMCount(rm, ++decommisionedNMsCount);
    request.setNodeId(nm1.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, shutdownNMsCount);
    checkDecommissionedNMCount(rm, decommisionedNMsCount);

    // 1. Register the Node Manager
    // 2. Exclude the same Node Manager host
    // 3. Unregister the Node Manager
    MockNM nm2 = new MockNM("host2:1234", 5120, resourceTrackerService);
    RegisterNodeManagerResponse response2 = nm2.registerNode();
    Assert.assertEquals(NodeAction.NORMAL, response2.getNodeAction());
    writeToHostsFile("host1");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    request.setNodeId(nm2.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, shutdownNMsCount);
    checkDecommissionedNMCount(rm, ++decommisionedNMsCount);
  }
  /** decommissioning using a exclude hosts file */
  @Test
  public void testDecommissionWithExcludeHosts() throws Exception {
    Configuration conf = new Configuration();
    conf.set("yarn.resourcemanager.nodes.exclude-path", hostFile.getAbsolutePath());

    writeToHostsFile("");
    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);

    int initialMetricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();

    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    writeToHostsFile("host2");

    rm.getNodesListManager().refreshNodes();

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(
        "The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
  /** Decommissioning using a pre-configured include hosts file */
  @Test
  public void testDecommissionWithIncludeHosts() throws Exception {

    writeToHostsFile("localhost", "host1", "host2");
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());

    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    MockNM nm3 = rm.registerNode("localhost:4433", 1024);

    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert (metrics != null);
    int metricCount = metrics.getNumDecommisionedNMs();

    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    // To test that IPs also work
    String ip = NetUtils.normalizeHostName("localhost");
    writeToHostsFile("host1", ip);

    rm.getNodesListManager().refreshNodes(conf);

    checkDecommissionedNMCount(rm, ++metricCount);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(1, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(
        "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(metricCount, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
  }
  @Test
  public void testUnhealthyNMUnregistration() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();

    ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs());
    // node healthy
    nm1.nodeHeartbeat(true);
    int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs();

    // node unhealthy
    nm1.nodeHeartbeat(false);
    checkUnealthyNMCount(rm, nm1, true, 1);
    UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class);
    request.setNodeId(nm1.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, ++shutdownNMsCount);
  }
  @After
  public void tearDown() {
    if (hostFile != null && hostFile.exists()) {
      hostFile.delete();
    }

    ClusterMetrics.destroy();
    if (rm != null) {
      rm.stop();
    }

    MetricsSystem ms = DefaultMetricsSystem.instance();
    if (ms.getSource("ClusterMetrics") != null) {
      DefaultMetricsSystem.shutdown();
    }
  }
  private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count)
      throws Exception {

    int waitCount = 0;
    while ((rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY)
            == health
        && waitCount++ < 20) {
      synchronized (this) {
        wait(100);
      }
    }
    Assert.assertFalse(
        (rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY)
            == health);
    Assert.assertEquals(
        "Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs());
  }
  @Test
  public void testUnhealthyNodeStatus() throws Exception {
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());

    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs());
    // node healthy
    nm1.nodeHeartbeat(true);

    // node unhealthy
    nm1.nodeHeartbeat(false);
    checkUnealthyNMCount(rm, nm1, true, 1);

    // node healthy again
    nm1.nodeHeartbeat(true);
    checkUnealthyNMCount(rm, nm1, false, 0);
  }
  @Test
  public void testReboot() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:1234", 2048);

    int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat =
        nm2.nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100);
    Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(
        "Too far behind rm response id:0 nm response id:-100",
        nodeHeartbeat.getDiagnosticsMessage());
    checkRebootedNMCount(rm, ++initialMetricCount);
  }
  @Test
  public void testUnhealthyNodeStatus() throws Exception {
    Configuration conf = new Configuration();
    conf.set("yarn.resourcemanager.nodes.exclude-path", hostFile.getAbsolutePath());

    MockRM rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs());
    // node healthy
    nm1.nodeHeartbeat(true);

    // node unhealthy
    nm1.nodeHeartbeat(false);
    checkUnealthyNMCount(rm, nm1, true, 1);

    // node healthy again
    nm1.nodeHeartbeat(true);
    checkUnealthyNMCount(rm, nm1, false, 0);
  }
  @Test
  public void testReboot() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = new MockNM("host2:1234", 2048, rm.getResourceTrackerService());

    int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs();
    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat =
        nm2.nodeHeartbeat(
            new HashMap<ApplicationId, List<ContainerStatus>>(),
            true,
            recordFactory.newRecordInstance(NodeId.class));
    Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
    checkRebootedNMCount(rm, ++initialMetricCount);
  }
  @Test
  public void testNMUnregistration() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();

    ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
    MockNM nm1 = rm.registerNode("host1:1234", 5120);

    int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class);
    request.setNodeId(nm1.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, ++shutdownNMsCount);

    // The RM should remove the node after unregistration, hence send a reboot
    // command.
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction()));
  }
  /** @throws Exception If failed. */
  public void testSingleTaskMetrics() throws Exception {
    Ignite ignite = grid();

    ignite.compute().execute(new GridTestTask(), "testArg");

    // Let metrics update twice.
    final CountDownLatch latch = new CountDownLatch(2);

    ignite
        .events()
        .localListen(
            new IgnitePredicate<Event>() {
              @Override
              public boolean apply(Event evt) {
                assert evt.type() == EVT_NODE_METRICS_UPDATED;

                latch.countDown();

                return true;
              }
            },
            EVT_NODE_METRICS_UPDATED);

    // Wait for metrics update.
    latch.await();

    ClusterMetrics metrics = ignite.cluster().localNode().metrics();

    info("Node metrics: " + metrics);

    assert metrics.getAverageActiveJobs() > 0;
    assert metrics.getAverageCancelledJobs() == 0;
    assert metrics.getAverageJobExecuteTime() >= 0;
    assert metrics.getAverageJobWaitTime() >= 0;
    assert metrics.getAverageRejectedJobs() == 0;
    assert metrics.getAverageWaitingJobs() == 0;
    assert metrics.getCurrentActiveJobs() == 0;
    assert metrics.getCurrentCancelledJobs() == 0;
    assert metrics.getCurrentJobExecuteTime() == 0;
    assert metrics.getCurrentJobWaitTime() == 0;
    assert metrics.getCurrentWaitingJobs() == 0;
    assert metrics.getMaximumActiveJobs() == 1;
    assert metrics.getMaximumCancelledJobs() == 0;
    assert metrics.getMaximumJobExecuteTime() >= 0;
    assert metrics.getMaximumJobWaitTime() >= 0;
    assert metrics.getMaximumRejectedJobs() == 0;
    assert metrics.getMaximumWaitingJobs() == 0;
    assert metrics.getTotalCancelledJobs() == 0;
    assert metrics.getTotalExecutedJobs() == 1;
    assert metrics.getTotalRejectedJobs() == 0;
    assert metrics.getTotalExecutedTasks() == 1;

    assertTrue(
        "MaximumJobExecuteTime="
            + metrics.getMaximumJobExecuteTime()
            + " is less than AverageJobExecuteTime="
            + metrics.getAverageJobExecuteTime(),
        metrics.getMaximumJobExecuteTime() >= metrics.getAverageJobExecuteTime());
  }
  /** Decommissioning using a pre-configured exclude hosts file */
  @Test
  public void testDecommissionWithExcludeHosts() throws Exception {
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());

    writeToHostsFile("");
    final DrainDispatcher dispatcher = new DrainDispatcher();
    rm =
        new MockRM(conf) {
          @Override
          protected Dispatcher createDispatcher() {
            return dispatcher;
          }
        };
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    MockNM nm3 = rm.registerNode("localhost:4433", 1024);

    dispatcher.await();

    int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    dispatcher.await();

    // To test that IPs also work
    String ip = NetUtils.normalizeHostName("localhost");
    writeToHostsFile("host2", ip);

    rm.getNodesListManager().refreshNodes(conf);

    checkDecommissionedNMCount(rm, metricCount + 2);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(
        "The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(
        "The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
    dispatcher.await();

    writeToHostsFile("");
    rm.getNodesListManager().refreshNodes(conf);

    nm3 = rm.registerNode("localhost:4433", 1024);
    dispatcher.await();
    nodeHeartbeat = nm3.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    // decommissined node is 1 since 1 node is rejoined after updating exclude
    // file
    checkDecommissionedNMCount(rm, metricCount + 1);
  }
 @Test(timeout = 60000)
 public void testReconnect() throws IOException {
   RMNodeImpl node = getRunningNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(
       new RMNodeReconnectEvent(
           node.getNodeID(), node, new TransactionStateImpl(TransactionType.RM)));
   Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.RUNNING, node.getState());
   Assert.assertNotNull(nodesListManagerEvent);
   Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
 }
 @Test(timeout = 60000)
 public void testAdd() {
   RMNodeImpl node = getNewNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED, null));
   Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.RUNNING, node.getState());
   Assert.assertNotNull(nodesListManagerEvent);
   Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
 }
 @Test(timeout = 60000)
 public void testUnhealthyRebooting() throws IOException {
   RMNodeImpl node = getUnhealthyNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(
       new RMNodeEvent(
           node.getNodeID(),
           RMNodeEventType.REBOOTING,
           new TransactionStateImpl(TransactionType.RM)));
   Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy - 1, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted + 1, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.REBOOTED, node.getState());
 }
  @Test
  public void testReconnectNode() throws Exception {
    final DrainDispatcher dispatcher = new DrainDispatcher();
    rm =
        new MockRM() {
          @Override
          protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
            return new SchedulerEventDispatcher(this.scheduler) {
              @Override
              public void handle(SchedulerEvent event) {
                scheduler.handle(event);
              }
            };
          }

          @Override
          protected Dispatcher createDispatcher() {
            return dispatcher;
          }
        };
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 5120);
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(false);
    dispatcher.await();
    checkUnealthyNMCount(rm, nm2, true, 1);
    final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs();
    QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
    // TODO Metrics incorrect in case of the FifoScheduler
    Assert.assertEquals(5120, metrics.getAvailableMB());

    // reconnect of healthy node
    nm1 = rm.registerNode("host1:1234", 5120);
    NodeHeartbeatResponse response = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    dispatcher.await();
    Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
    checkUnealthyNMCount(rm, nm2, true, 1);

    // reconnect of unhealthy node
    nm2 = rm.registerNode("host2:5678", 5120);
    response = nm2.nodeHeartbeat(false);
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    dispatcher.await();
    Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
    checkUnealthyNMCount(rm, nm2, true, 1);

    // unhealthy node changed back to healthy
    nm2 = rm.registerNode("host2:5678", 5120);
    dispatcher.await();
    response = nm2.nodeHeartbeat(true);
    response = nm2.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals(5120 + 5120, metrics.getAvailableMB());

    // reconnect of node with changed capability
    nm1 = rm.registerNode("host2:5678", 10240);
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    Assert.assertEquals(5120 + 10240, metrics.getAvailableMB());

    // reconnect of node with changed capability and running applications
    List<ApplicationId> runningApps = new ArrayList<ApplicationId>();
    runningApps.add(ApplicationId.newInstance(1, 0));
    nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps);
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
    Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());

    // reconnect healthy node changing http port
    nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService());
    nm1.setHttpPort(3);
    nm1.registerNode();
    dispatcher.await();
    response = nm1.nodeHeartbeat(true);
    response = nm1.nodeHeartbeat(true);
    dispatcher.await();
    RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    Assert.assertEquals(3, rmNode.getHttpPort());
    Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory());
    Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
  }