@Test public void testNodeRegistrationVersionLessThanRM() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set(YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION, "EqualToRM"); rm = new MockRM(conf); rm.start(); String nmVersion = "1.9.9"; ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(nmVersion); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getNodeAction()); Assert.assertTrue( "Diagnostic message did not contain: 'Disallowed NodeManager " + "Version " + nmVersion + ", is less than the minimum version'", response .getDiagnosticsMessage() .contains( "Disallowed NodeManager Version " + nmVersion + ", is less than the minimum version ")); }
@Test public void testNodeHeartBeatWithInvalidLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toNodeLabelSet("A")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(registerReq); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B", "#C")); // Invalid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be NORMAL when RM heartbeat labels are rejected Assert.assertEquals( "Response should be NORMAL when RM heartbeat labels" + " are rejected", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); Assert.assertFalse(nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); Assert.assertNotNull(nodeHeartbeatResponse.getDiagnosticsMessage()); rm.stop(); }
@Test public void testNodeRegistrationWithInvalidLabelsSyntax() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("X", "Y", "Z")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); req.setNodeLabels(toNodeLabelSet("#Y")); RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals( "On Invalid Node Labels action is expected to be normal", NodeAction.NORMAL, response.getNodeAction()); Assert.assertNull(nodeLabelsMgr.getNodeLabels().get(nodeId)); Assert.assertNotNull(response.getDiagnosticsMessage()); Assert.assertFalse( "Node Labels should not accepted by RM If Invalid", response.getAreNodeLabelsAcceptedByRM()); if (rm != null) { rm.stop(); } }
@Test public void testSetRMIdentifierInRegistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); RegisterNodeManagerResponse response = nm.registerNode(); // Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse Assert.assertEquals(ResourceManager.getClusterTimeStamp(), response.getRMIdentifier()); }
@Test public void testNodeHeartbeatWithCentralLabelConfig() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DEFAULT_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); req.setNodeLabels(toNodeLabelSet("A", "B", "C")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(req); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B")); // Valid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be ok but the RMacceptNodeLabelsUpdate should be false Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); // no change in the labels, Assert.assertNull(nodeLabelsMgr.getNodeLabels().get(nodeId)); // heartbeat labels rejected Assert.assertFalse( "Invalid Node Labels should not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); if (rm != null) { rm.stop(); } }
@Test public void testInvalidNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); int decommisionedNMsCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); // Node not found for unregister UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(BuilderUtils.newNodeId("host", 1234)); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, 0); checkDecommissionedNMCount(rm, 0); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Give NM heartbeat to RM // 4. Unregister the Node Manager MockNM nm1 = new MockNM("host1:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response = nm1.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); NodeHeartbeatResponse heartbeatResponse = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.SHUTDOWN, heartbeatResponse.getNodeAction()); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, decommisionedNMsCount); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Unregister the Node Manager MockNM nm2 = new MockNM("host2:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response2 = nm2.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response2.getNodeAction()); writeToHostsFile("host1"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); request.setNodeId(nm2.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); }
@Test public void testNodeRegistrationWithLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toSet(NodeLabel.newInstance("A"))); RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(registerReq); Assert.assertEquals( "Action should be normal on valid Node Labels", NodeAction.NORMAL, response.getNodeAction()); assertCollectionEquals( nodeLabelsMgr.getNodeLabels().get(nodeId), ResourceTrackerService.convertToStringSet(registerReq.getNodeLabels())); Assert.assertTrue( "Valid Node Labels were not accepted by RM", response.getAreNodeLabelsAcceptedByRM()); rm.stop(); }
@Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1"); Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.nodes.include-path", hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = Records.newRecord(NodeId.class); nodeId.setHost("host2"); nodeId.setPort(1234); req.setNodeId(nodeId); req.setHttpPort(1234); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getRegistrationResponse().getNodeAction()); }
@Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); req.setNodeId(nodeId); req.setHttpPort(1234); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getNodeAction()); Assert.assertEquals( "Disallowed NodeManager from host2, Sending SHUTDOWN signal to the NodeManager.", response.getDiagnosticsMessage()); }
@Test public void testNodeRegistrationSuccess() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); }
@Test public void testReboot() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = new MockNM("host2:1234", 2048, rm.getResourceTrackerService()); int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs(); HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat( new HashMap<ApplicationId, List<ContainerStatus>>(), true, recordFactory.newRecordInstance(NodeId.class)); Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction())); checkRebootedNMCount(rm, ++initialMetricCount); }
@Test public void testUnhealthyNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); // node unhealthy nm1.nodeHeartbeat(false); checkUnealthyNMCount(rm, nm1, true, 1); UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); }
@Test public void testNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); // The RM should remove the node after unregistration, hence send a reboot // command. nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); }
@Test public void testNodeRegistrationWithMinimumAllocations() throws Exception { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, "2048"); conf.set(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, "4"); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = BuilderUtils.newNodeId("host", 1234); req.setNodeId(nodeId); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); RegisterNodeManagerResponse response1 = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response1.getNodeAction()); capability.setMemory(2048); capability.setVirtualCores(1); req.setResource(capability); RegisterNodeManagerResponse response2 = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response2.getNodeAction()); capability.setMemory(1024); capability.setVirtualCores(4); req.setResource(capability); RegisterNodeManagerResponse response3 = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response3.getNodeAction()); capability.setMemory(2048); capability.setVirtualCores(4); req.setResource(capability); RegisterNodeManagerResponse response4 = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.NORMAL, response4.getNodeAction()); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Test public void testHandleContainerStatusInvalidCompletions() throws Exception { rm = new MockRM(new YarnConfiguration()); rm.start(); EventHandler handler = spy(rm.getRMContext().getDispatcher().getEventHandler()); // Case 1: Unmanaged AM RMApp app = rm.submitApp(1024, true); // Case 1.1: AppAttemptId is null NMContainerStatus report = NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); // Case 1.2: Master container is null RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt(); currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); // Case 2: Managed AM app = rm.submitApp(1024); // Case 2.1: AppAttemptId is null report = NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } verify(handler, never()).handle((Event) any()); // Case 2.2: Master container is null currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt(); currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } verify(handler, never()).handle((Event) any()); }
@Test public void testReconnectNode() throws Exception { final DrainDispatcher dispatcher = new DrainDispatcher(); rm = new MockRM() { @Override protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() { return new SchedulerEventDispatcher(this.scheduler) { @Override public void handle(SchedulerEvent event) { scheduler.handle(event); } }; } @Override protected Dispatcher createDispatcher() { return dispatcher; } }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 5120); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(false); dispatcher.await(); checkUnealthyNMCount(rm, nm2, true, 1); final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs(); QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics(); // TODO Metrics incorrect in case of the FifoScheduler Assert.assertEquals(5120, metrics.getAvailableMB()); // reconnect of healthy node nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse response = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // reconnect of unhealthy node nm2 = rm.registerNode("host2:5678", 5120); response = nm2.nodeHeartbeat(false); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // unhealthy node changed back to healthy nm2 = rm.registerNode("host2:5678", 5120); dispatcher.await(); response = nm2.nodeHeartbeat(true); response = nm2.nodeHeartbeat(true); dispatcher.await(); Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); // reconnect of node with changed capability nm1 = rm.registerNode("host2:5678", 10240); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 10240, metrics.getAvailableMB()); // reconnect of node with changed capability and running applications List<ApplicationId> runningApps = new ArrayList<ApplicationId>(); runningApps.add(ApplicationId.newInstance(1, 0)); nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); // reconnect healthy node changing http port nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); nm1.setHttpPort(3); nm1.registerNode(); dispatcher.await(); response = nm1.nodeHeartbeat(true); response = nm1.nodeHeartbeat(true); dispatcher.await(); RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); Assert.assertEquals(3, rmNode.getHttpPort()); Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory()); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); }
@Test public void testNodeHeartBeatWithLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); // adding valid labels try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } // Registering of labels and other required info to RM ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toNodeLabelSet("A")); // Node register label RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(registerReq); // modification of labels during heartbeat NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B")); // Node heartbeat label update NodeStatus nodeStatusObject = getNodeStatusObject(nodeId); heartbeatReq.setNodeStatus(nodeStatusObject); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); Assert.assertEquals( "InValid Node Labels were not accepted by RM", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); assertCollectionEquals( nodeLabelsMgr.getNodeLabels().get(nodeId), ResourceTrackerService.convertToStringSet(heartbeatReq.getNodeLabels())); Assert.assertTrue( "Valid Node Labels were not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); // After modification of labels next heartbeat sends null informing no update Set<String> oldLabels = nodeLabelsMgr.getNodeLabels().get(nodeId); int responseId = nodeStatusObject.getResponseId(); heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(null); // Node heartbeat label update nodeStatusObject = getNodeStatusObject(nodeId); nodeStatusObject.setResponseId(responseId + 2); heartbeatReq.setNodeStatus(nodeStatusObject); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); Assert.assertEquals( "InValid Node Labels were not accepted by RM", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); assertCollectionEquals(nodeLabelsMgr.getNodeLabels().get(nodeId), oldLabels); Assert.assertFalse( "Node Labels should not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); rm.stop(); }