/** Decommissioning using a post-configured exclude hosts file */
 @Test
 public void testAddNewExcludePathToConfiguration() throws Exception {
   Configuration conf = new Configuration();
   rm = new MockRM(conf);
   rm.start();
   MockNM nm1 = rm.registerNode("host1:1234", 5120);
   MockNM nm2 = rm.registerNode("host2:5678", 10240);
   ClusterMetrics metrics = ClusterMetrics.getMetrics();
   assert (metrics != null);
   int initialMetricCount = metrics.getNumDecommisionedNMs();
   NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
   Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
   nodeHeartbeat = nm2.nodeHeartbeat(true);
   Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
   writeToHostsFile("host2");
   conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());
   rm.getNodesListManager().refreshNodes(conf);
   checkDecommissionedNMCount(rm, ++initialMetricCount);
   nodeHeartbeat = nm1.nodeHeartbeat(true);
   Assert.assertEquals(
       "Node should not have been decomissioned.",
       NodeAction.NORMAL,
       nodeHeartbeat.getNodeAction());
   nodeHeartbeat = nm2.nodeHeartbeat(true);
   Assert.assertEquals(
       "Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(),
       NodeAction.SHUTDOWN,
       nodeHeartbeat.getNodeAction());
 }
Exemplo n.º 2
0
 @Test(timeout = 60000)
 public void testAdd() {
   RMNodeImpl node = getNewNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED, null));
   Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.RUNNING, node.getState());
   Assert.assertNotNull(nodesListManagerEvent);
   Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
 }
Exemplo n.º 3
0
 @Test(timeout = 60000)
 public void testReconnect() throws IOException {
   RMNodeImpl node = getRunningNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(
       new RMNodeReconnectEvent(
           node.getNodeID(), node, new TransactionStateImpl(TransactionType.RM)));
   Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.RUNNING, node.getState());
   Assert.assertNotNull(nodesListManagerEvent);
   Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
 }
Exemplo n.º 4
0
 @Test(timeout = 60000)
 public void testUnhealthyRebooting() throws IOException {
   RMNodeImpl node = getUnhealthyNode();
   ClusterMetrics cm = ClusterMetrics.getMetrics();
   int initialActive = cm.getNumActiveNMs();
   int initialLost = cm.getNumLostNMs();
   int initialUnhealthy = cm.getUnhealthyNMs();
   int initialDecommissioned = cm.getNumDecommisionedNMs();
   int initialRebooted = cm.getNumRebootedNMs();
   node.handle(
       new RMNodeEvent(
           node.getNodeID(),
           RMNodeEventType.REBOOTING,
           new TransactionStateImpl(TransactionType.RM)));
   Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
   Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
   Assert.assertEquals("Unhealthy Nodes", initialUnhealthy - 1, cm.getUnhealthyNMs());
   Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
   Assert.assertEquals("Rebooted Nodes", initialRebooted + 1, cm.getNumRebootedNMs());
   Assert.assertEquals(NodeState.REBOOTED, node.getState());
 }
  /** Decommissioning using a pre-configured include hosts file */
  @Test
  public void testDecommissionWithIncludeHosts() throws Exception {

    writeToHostsFile("localhost", "host1", "host2");
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());

    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    MockNM nm3 = rm.registerNode("localhost:4433", 1024);

    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert (metrics != null);
    int metricCount = metrics.getNumDecommisionedNMs();

    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    // To test that IPs also work
    String ip = NetUtils.normalizeHostName("localhost");
    writeToHostsFile("host1", ip);

    rm.getNodesListManager().refreshNodes(conf);

    checkDecommissionedNMCount(rm, ++metricCount);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(1, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(
        "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(metricCount, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
  }
  /** decommissioning using a include hosts file */
  @Test
  public void testDecommissionWithIncludeHosts() throws Exception {

    writeToHostsFile("host1", "host2");
    Configuration conf = new Configuration();
    conf.set("yarn.resourcemanager.nodes.include-path", hostFile.getAbsolutePath());

    rm = new MockRM(conf);
    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);

    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert (metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();

    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    writeToHostsFile("host1");

    rm.getNodesListManager().refreshNodes();

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(
        "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));

    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }