/** Decommissioning using a post-configured exclude hosts file */ @Test public void testAddNewExcludePathToConfiguration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int initialMetricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++initialMetricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals( "Node should not have been decomissioned.", NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals( "Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(), NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction()); }
@Test(timeout = 60000) public void testAdd() { RMNodeImpl node = getNewNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); int initialActive = cm.getNumActiveNMs(); int initialLost = cm.getNumLostNMs(); int initialUnhealthy = cm.getUnhealthyNMs(); int initialDecommissioned = cm.getNumDecommisionedNMs(); int initialRebooted = cm.getNumRebootedNMs(); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED, null)); Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs()); Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs()); Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs()); Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs()); Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs()); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
@Test(timeout = 60000) public void testReconnect() throws IOException { RMNodeImpl node = getRunningNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); int initialActive = cm.getNumActiveNMs(); int initialLost = cm.getNumLostNMs(); int initialUnhealthy = cm.getUnhealthyNMs(); int initialDecommissioned = cm.getNumDecommisionedNMs(); int initialRebooted = cm.getNumRebootedNMs(); node.handle( new RMNodeReconnectEvent( node.getNodeID(), node, new TransactionStateImpl(TransactionType.RM))); Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs()); Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs()); Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs()); Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs()); Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs()); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
@Test(timeout = 60000) public void testUnhealthyRebooting() throws IOException { RMNodeImpl node = getUnhealthyNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); int initialActive = cm.getNumActiveNMs(); int initialLost = cm.getNumLostNMs(); int initialUnhealthy = cm.getUnhealthyNMs(); int initialDecommissioned = cm.getNumDecommisionedNMs(); int initialRebooted = cm.getNumRebootedNMs(); node.handle( new RMNodeEvent( node.getNodeID(), RMNodeEventType.REBOOTING, new TransactionStateImpl(TransactionType.RM))); Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs()); Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs()); Assert.assertEquals("Unhealthy Nodes", initialUnhealthy - 1, cm.getUnhealthyNMs()); Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs()); Assert.assertEquals("Rebooted Nodes", initialRebooted + 1, cm.getNumRebootedNMs()); Assert.assertEquals(NodeState.REBOOTED, node.getState()); }
/** Decommissioning using a pre-configured include hosts file */ @Test public void testDecommissionWithIncludeHosts() throws Exception { writeToHostsFile("localhost", "host1", "host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int metricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host1", ip); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++metricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(1, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(metricCount, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); }
/** decommissioning using a include hosts file */ @Test public void testDecommissionWithIncludeHosts() throws Exception { writeToHostsFile("host1", "host2"); Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.nodes.include-path", hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int initialMetricCount = metrics.getNumDecommisionedNMs(); HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); writeToHostsFile("host1"); rm.getNodesListManager().refreshNodes(); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); checkDecommissionedNMCount(rm, ++initialMetricCount); }