public NodeHeartbeatResponse nodeHeartbeat( Map<ApplicationId, List<ContainerStatus>> conts, boolean isHealthy, int resId) throws Exception { NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class); NodeStatus status = Records.newRecord(NodeStatus.class); status.setResponseId(resId); status.setNodeId(nodeId); for (Map.Entry<ApplicationId, List<ContainerStatus>> entry : conts.entrySet()) { Log.info("entry.getValue() " + entry.getValue()); status.setContainersStatuses(entry.getValue()); } NodeHealthStatus healthStatus = Records.newRecord(NodeHealthStatus.class); healthStatus.setHealthReport(""); healthStatus.setIsNodeHealthy(isHealthy); healthStatus.setLastHealthReportTime(1); status.setNodeHealthStatus(healthStatus); req.setNodeStatus(status); req.setLastKnownContainerTokenMasterKey(this.currentContainerTokenMasterKey); req.setLastKnownNMTokenMasterKey(this.currentNMTokenMasterKey); NodeHeartbeatResponse heartbeatResponse = resourceTracker.nodeHeartbeat(req); MasterKey masterKeyFromRM = heartbeatResponse.getContainerTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.getKeyId() != this.currentContainerTokenMasterKey.getKeyId()) { this.currentContainerTokenMasterKey = masterKeyFromRM; } masterKeyFromRM = heartbeatResponse.getNMTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.getKeyId() != this.currentNMTokenMasterKey.getKeyId()) { this.currentNMTokenMasterKey = masterKeyFromRM; } return heartbeatResponse; }
@Test public void testNodeHeartBeatWithInvalidLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toNodeLabelSet("A")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(registerReq); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B", "#C")); // Invalid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be NORMAL when RM heartbeat labels are rejected Assert.assertEquals( "Response should be NORMAL when RM heartbeat labels" + " are rejected", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); Assert.assertFalse(nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); Assert.assertNotNull(nodeHeartbeatResponse.getDiagnosticsMessage()); rm.stop(); }
@Test public void testNodeHeartbeatWithCentralLabelConfig() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DEFAULT_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); req.setNodeLabels(toNodeLabelSet("A", "B", "C")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(req); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B")); // Valid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be ok but the RMacceptNodeLabelsUpdate should be false Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); // no change in the labels, Assert.assertNull(nodeLabelsMgr.getNodeLabels().get(nodeId)); // heartbeat labels rejected Assert.assertFalse( "Invalid Node Labels should not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); if (rm != null) { rm.stop(); } }
@Test public void testInvalidNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); int decommisionedNMsCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); // Node not found for unregister UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(BuilderUtils.newNodeId("host", 1234)); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, 0); checkDecommissionedNMCount(rm, 0); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Give NM heartbeat to RM // 4. Unregister the Node Manager MockNM nm1 = new MockNM("host1:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response = nm1.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); NodeHeartbeatResponse heartbeatResponse = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.SHUTDOWN, heartbeatResponse.getNodeAction()); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, decommisionedNMsCount); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Unregister the Node Manager MockNM nm2 = new MockNM("host2:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response2 = nm2.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response2.getNodeAction()); writeToHostsFile("host1"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); request.setNodeId(nm2.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); }
/** * Test RM read NM next heartBeat Interval correctly from Configuration file, and NM get next * heartBeat Interval from RM correctly */ @Test(timeout = 50000) public void testGetNextHeartBeatInterval() throws Exception { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, "4000"); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(4000, nodeHeartbeat.getNextHeartBeatInterval()); NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true); Assert.assertEquals(4000, nodeHeartbeat2.getNextHeartBeatInterval()); }
@Test public void testReboot() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:1234", 2048); int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100); Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals( "Too far behind rm response id:0 nm response id:-100", nodeHeartbeat.getDiagnosticsMessage()); checkRebootedNMCount(rm, ++initialMetricCount); }
/** Decommissioning using a post-configured exclude hosts file */ @Test public void testAddNewExcludePathToConfiguration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int initialMetricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++initialMetricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals( "Node should not have been decomissioned.", NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals( "Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(), NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction()); }
@Test public void testNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); // The RM should remove the node after unregistration, hence send a reboot // command. nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); }
/** Decommissioning using a pre-configured include hosts file */ @Test public void testDecommissionWithIncludeHosts() throws Exception { writeToHostsFile("localhost", "host1", "host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int metricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host1", ip); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++metricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(1, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(metricCount, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); }
@Test(timeout = 2000000) public void testUpdateHeartbeatResponseForCleanup() throws IOException { RMNodeImpl node = getRunningNode(); NodeId nodeId = node.getNodeID(); int rpcID = HopYarnAPIUtilities.getRPCID(); byte[] allNMRequestData = new byte[1]; allNMRequestData[0] = 0xA; try { RMUtilities.persistAppMasterRPC(rpcID, RPC.Type.RegisterNM, allNMRequestData); } catch (IOException ex) { Logger.getLogger(TestRMNodeTransitions.class.getName()).log(Level.SEVERE, null, ex); } TransactionState ts = new TransactionStateImpl(TransactionType.RM); // Expire a container ContainerId completedContainerId = BuilderUtils.newContainerId( BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0); node.handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId, ts)); ts.decCounter(TransactionState.TransactionType.INIT); Assert.assertEquals(1, node.getContainersToCleanUp().size()); rpcID = HopYarnAPIUtilities.getRPCID(); allNMRequestData = new byte[1]; allNMRequestData[0] = 0xA; try { RMUtilities.persistAppMasterRPC(rpcID, RPC.Type.FinishApplicationMaster, allNMRequestData); } catch (IOException ex) { Logger.getLogger(TestRMNodeTransitions.class.getName()).log(Level.SEVERE, null, ex); } TransactionState ts2 = new TransactionStateImpl(TransactionType.RM); // TransactionStateRM.newInstance(rpcID); // Finish an application ApplicationId finishedAppId = BuilderUtils.newApplicationId(0, 1); node.handle(new RMNodeCleanAppEvent(nodeId, finishedAppId, ts2)); ts2.decCounter(TransactionState.TransactionType.INIT); Assert.assertEquals(1, node.getAppsToCleanup().size()); rpcID = HopYarnAPIUtilities.getRPCID(); allNMRequestData = new byte[1]; allNMRequestData[0] = 0xA; try { RMUtilities.persistAppMasterRPC(rpcID, RPC.Type.FinishApplicationMaster, allNMRequestData); } catch (IOException ex) { Logger.getLogger(TestRMNodeTransitions.class.getName()).log(Level.SEVERE, null, ex); } TransactionState ts3 = new TransactionStateImpl(TransactionType.RM); // TransactionStateRM.newInstance(rpcID); // Verify status update does not clear containers/apps to cleanup // but updating heartbeat response for cleanup does RMNodeStatusEvent statusEvent = getMockRMNodeStatusEvent(); RMNodeStatusEvent se = new RMNodeStatusEvent( statusEvent.getNodeId(), statusEvent.getNodeHealthStatus(), statusEvent.getContainers(), statusEvent.getKeepAliveAppIds(), statusEvent.getLatestResponse(), ts3); node.handle(se); ts3.decCounter(TransactionState.TransactionType.INIT); Assert.assertEquals(1, node.getContainersToCleanUp().size()); Assert.assertEquals(1, node.getAppsToCleanup().size()); NodeHeartbeatResponse hbrsp = Records.newRecord(NodeHeartbeatResponse.class); node.updateNodeHeartbeatResponseForCleanup(hbrsp, ts); Assert.assertEquals(0, node.getContainersToCleanUp().size()); Assert.assertEquals(0, node.getAppsToCleanup().size()); Assert.assertEquals(1, hbrsp.getContainersToCleanup().size()); Assert.assertEquals(completedContainerId, hbrsp.getContainersToCleanup().get(0)); Assert.assertEquals(1, hbrsp.getApplicationsToCleanup().size()); Assert.assertEquals(finishedAppId, hbrsp.getApplicationsToCleanup().get(0)); }
@Test public void testReconnectNode() throws Exception { final DrainDispatcher dispatcher = new DrainDispatcher(); rm = new MockRM() { @Override protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() { return new SchedulerEventDispatcher(this.scheduler) { @Override public void handle(SchedulerEvent event) { scheduler.handle(event); } }; } @Override protected Dispatcher createDispatcher() { return dispatcher; } }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 5120); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(false); dispatcher.await(); checkUnealthyNMCount(rm, nm2, true, 1); final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs(); QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics(); // TODO Metrics incorrect in case of the FifoScheduler Assert.assertEquals(5120, metrics.getAvailableMB()); // reconnect of healthy node nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse response = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // reconnect of unhealthy node nm2 = rm.registerNode("host2:5678", 5120); response = nm2.nodeHeartbeat(false); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // unhealthy node changed back to healthy nm2 = rm.registerNode("host2:5678", 5120); dispatcher.await(); response = nm2.nodeHeartbeat(true); response = nm2.nodeHeartbeat(true); dispatcher.await(); Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); // reconnect of node with changed capability nm1 = rm.registerNode("host2:5678", 10240); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 10240, metrics.getAvailableMB()); // reconnect of node with changed capability and running applications List<ApplicationId> runningApps = new ArrayList<ApplicationId>(); runningApps.add(ApplicationId.newInstance(1, 0)); nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); // reconnect healthy node changing http port nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); nm1.setHttpPort(3); nm1.registerNode(); dispatcher.await(); response = nm1.nodeHeartbeat(true); response = nm1.nodeHeartbeat(true); dispatcher.await(); RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); Assert.assertEquals(3, rmNode.getHttpPort()); Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory()); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); }
@Test public void testNodeHeartBeatWithLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); // adding valid labels try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } // Registering of labels and other required info to RM ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toNodeLabelSet("A")); // Node register label RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(registerReq); // modification of labels during heartbeat NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B")); // Node heartbeat label update NodeStatus nodeStatusObject = getNodeStatusObject(nodeId); heartbeatReq.setNodeStatus(nodeStatusObject); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); Assert.assertEquals( "InValid Node Labels were not accepted by RM", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); assertCollectionEquals( nodeLabelsMgr.getNodeLabels().get(nodeId), ResourceTrackerService.convertToStringSet(heartbeatReq.getNodeLabels())); Assert.assertTrue( "Valid Node Labels were not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); // After modification of labels next heartbeat sends null informing no update Set<String> oldLabels = nodeLabelsMgr.getNodeLabels().get(nodeId); int responseId = nodeStatusObject.getResponseId(); heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(null); // Node heartbeat label update nodeStatusObject = getNodeStatusObject(nodeId); nodeStatusObject.setResponseId(responseId + 2); heartbeatReq.setNodeStatus(nodeStatusObject); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); Assert.assertEquals( "InValid Node Labels were not accepted by RM", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); assertCollectionEquals(nodeLabelsMgr.getNodeLabels().get(nodeId), oldLabels); Assert.assertFalse( "Node Labels should not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); rm.stop(); }
/** Decommissioning using a pre-configured exclude hosts file */ @Test public void testDecommissionWithExcludeHosts() throws Exception { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); writeToHostsFile(""); final DrainDispatcher dispatcher = new DrainDispatcher(); rm = new MockRM(conf) { @Override protected Dispatcher createDispatcher() { return dispatcher; } }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); dispatcher.await(); int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); dispatcher.await(); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host2", ip); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, metricCount + 2); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue( "The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); dispatcher.await(); writeToHostsFile(""); rm.getNodesListManager().refreshNodes(conf); nm3 = rm.registerNode("localhost:4433", 1024); dispatcher.await(); nodeHeartbeat = nm3.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); // decommissined node is 1 since 1 node is rejoined after updating exclude // file checkDecommissionedNMCount(rm, metricCount + 1); }