@Test public void testAdminRefreshQueuesWithFileSystemBasedConfigurationProvider() throws IOException, YarnException { configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); // upload default configurations uploadDefaultConfiguration(); try { rm = new MockRM(configuration); rm.init(configuration); rm.start(); } catch (Exception ex) { fail("Should not get any exceptions"); } CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler(); int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications(); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); csConf.set("yarn.scheduler.capacity.maximum-applications", "5000"); uploadConfiguration(csConf, "capacity-scheduler.xml"); rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); int maxAppsAfter = cs.getConfiguration().getMaximumSystemApplications(); Assert.assertEquals(maxAppsAfter, 5000); Assert.assertTrue(maxAppsAfter != maxAppsBefore); }
/** Decommissioning using a post-configured exclude hosts file */ @Test public void testAddNewExcludePathToConfiguration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int initialMetricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++initialMetricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals( "Node should not have been decomissioned.", NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertEquals( "Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(), NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction()); }
@Test public void testNodeRegistrationVersionLessThanRM() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set(YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION, "EqualToRM"); rm = new MockRM(conf); rm.start(); String nmVersion = "1.9.9"; ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(nmVersion); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getNodeAction()); Assert.assertTrue( "Diagnostic message did not contain: 'Disallowed NodeManager " + "Version " + nmVersion + ", is less than the minimum version'", response .getDiagnosticsMessage() .contains( "Disallowed NodeManager Version " + nmVersion + ", is less than the minimum version ")); }
/** decommissioning using a exclude hosts file */ @Test public void testDecommissionWithExcludeHosts() throws Exception { Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.nodes.exclude-path", hostFile.getAbsolutePath()); writeToHostsFile(""); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); int initialMetricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); writeToHostsFile("host2"); rm.getNodesListManager().refreshNodes(); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); checkDecommissionedNMCount(rm, ++initialMetricCount); }
@Test public void testAdminAclsWithFileSystemBasedConfigurationProvider() throws IOException, YarnException { configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); // upload default configurations uploadDefaultConfiguration(); try { rm = new MockRM(configuration); rm.init(configuration); rm.start(); } catch (Exception ex) { fail("Should not get any exceptions"); } String aclStringBefore = rm.adminService.getAccessControlList().getAclString().trim(); YarnConfiguration yarnConf = new YarnConfiguration(); yarnConf.set(YarnConfiguration.YARN_ADMIN_ACL, "world:anyone:rwcda"); uploadConfiguration(yarnConf, "yarn-site.xml"); rm.adminService.refreshAdminAcls(RefreshAdminAclsRequest.newInstance()); String aclStringAfter = rm.adminService.getAccessControlList().getAclString().trim(); Assert.assertTrue(!aclStringAfter.equals(aclStringBefore)); Assert.assertEquals(aclStringAfter, "world:anyone:rwcda"); }
@Test public void testRefreshSuperUserGroupsWithFileSystemBasedConfigurationProvider() throws IOException, YarnException { configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); // upload default configurations uploadDefaultConfiguration(); try { rm = new MockRM(configuration); rm.init(configuration); rm.start(); } catch (Exception ex) { fail("Should not get any exceptions"); } Configuration coreConf = new Configuration(false); coreConf.set("hadoop.proxyuser.test.groups", "test_groups"); coreConf.set("hadoop.proxyuser.test.hosts", "test_hosts"); uploadConfiguration(coreConf, "core-site.xml"); rm.adminService.refreshSuperUserGroupsConfiguration( RefreshSuperUserGroupsConfigurationRequest.newInstance()); Assert.assertTrue(ProxyUsers.getProxyGroups().get("hadoop.proxyuser.test.groups").size() == 1); Assert.assertTrue( ProxyUsers.getProxyGroups().get("hadoop.proxyuser.test.groups").contains("test_groups")); Assert.assertTrue(ProxyUsers.getProxyHosts().get("hadoop.proxyuser.test.hosts").size() == 1); Assert.assertTrue( ProxyUsers.getProxyHosts().get("hadoop.proxyuser.test.hosts").contains("test_hosts")); }
@Test public void testNodeHeartBeatWithInvalidLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toNodeLabelSet("A")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(registerReq); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B", "#C")); // Invalid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be NORMAL when RM heartbeat labels are rejected Assert.assertEquals( "Response should be NORMAL when RM heartbeat labels" + " are rejected", NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); Assert.assertFalse(nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); Assert.assertNotNull(nodeHeartbeatResponse.getDiagnosticsMessage()); rm.stop(); }
@Test public void testRefreshUserToGroupsMappingsWithLocalConfigurationProvider() { rm = new MockRM(configuration); rm.init(configuration); rm.start(); try { rm.adminService.refreshUserToGroupsMappings(RefreshUserToGroupsMappingsRequest.newInstance()); } catch (Exception ex) { fail("Using localConfigurationProvider. Should not get any exception."); } }
@Test public void testNodeRegistrationWithInvalidLabelsSyntax() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("X", "Y", "Z")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); req.setNodeLabels(toNodeLabelSet("#Y")); RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals( "On Invalid Node Labels action is expected to be normal", NodeAction.NORMAL, response.getNodeAction()); Assert.assertNull(nodeLabelsMgr.getNodeLabels().get(nodeId)); Assert.assertNotNull(response.getDiagnosticsMessage()); Assert.assertFalse( "Node Labels should not accepted by RM If Invalid", response.getAreNodeLabelsAcceptedByRM()); if (rm != null) { rm.stop(); } }
@Test public void testSetRMIdentifierInRegistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); RegisterNodeManagerResponse response = nm.registerNode(); // Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse Assert.assertEquals(ResourceManager.getClusterTimeStamp(), response.getRMIdentifier()); }
@Test public void testNodeHeartbeatWithCentralLabelConfig() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DEFAULT_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); req.setNodeLabels(toNodeLabelSet("A", "B", "C")); RegisterNodeManagerResponse registerResponse = resourceTrackerService.registerNodeManager(req); NodeHeartbeatRequest heartbeatReq = Records.newRecord(NodeHeartbeatRequest.class); heartbeatReq.setNodeLabels(toNodeLabelSet("B")); // Valid heart beat labels heartbeatReq.setNodeStatus(getNodeStatusObject(nodeId)); heartbeatReq.setLastKnownNMTokenMasterKey(registerResponse.getNMTokenMasterKey()); heartbeatReq.setLastKnownContainerTokenMasterKey(registerResponse.getContainerTokenMasterKey()); NodeHeartbeatResponse nodeHeartbeatResponse = resourceTrackerService.nodeHeartbeat(heartbeatReq); // response should be ok but the RMacceptNodeLabelsUpdate should be false Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeatResponse.getNodeAction()); // no change in the labels, Assert.assertNull(nodeLabelsMgr.getNodeLabels().get(nodeId)); // heartbeat labels rejected Assert.assertFalse( "Invalid Node Labels should not accepted by RM", nodeHeartbeatResponse.getAreNodeLabelsAcceptedByRM()); if (rm != null) { rm.stop(); } }
@Test public void testNodeRegistrationWithLabels() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); conf.set( YarnConfiguration.NODELABEL_CONFIGURATION_TYPE, YarnConfiguration.DISTRIBUTED_NODELABEL_CONFIGURATION_TYPE); final RMNodeLabelsManager nodeLabelsMgr = new NullRMNodeLabelsManager(); rm = new MockRM(conf) { @Override protected RMNodeLabelsManager createNodeLabelManager() { return nodeLabelsMgr; } }; rm.start(); try { nodeLabelsMgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("A", "B", "C")); } catch (IOException e) { Assert.fail("Caught Exception while intializing"); e.printStackTrace(); } ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest registerReq = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); registerReq.setResource(capability); registerReq.setNodeId(nodeId); registerReq.setHttpPort(1234); registerReq.setNMVersion(YarnVersionInfo.getVersion()); registerReq.setNodeLabels(toSet(NodeLabel.newInstance("A"))); RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(registerReq); Assert.assertEquals( "Action should be normal on valid Node Labels", NodeAction.NORMAL, response.getNodeAction()); assertCollectionEquals( nodeLabelsMgr.getNodeLabels().get(nodeId), ResourceTrackerService.convertToStringSet(registerReq.getNodeLabels())); Assert.assertTrue( "Valid Node Labels were not accepted by RM", response.getAreNodeLabelsAcceptedByRM()); rm.stop(); }
@Test public void testInvalidNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); int decommisionedNMsCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); // Node not found for unregister UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(BuilderUtils.newNodeId("host", 1234)); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, 0); checkDecommissionedNMCount(rm, 0); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Give NM heartbeat to RM // 4. Unregister the Node Manager MockNM nm1 = new MockNM("host1:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response = nm1.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); writeToHostsFile("host2"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); NodeHeartbeatResponse heartbeatResponse = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.SHUTDOWN, heartbeatResponse.getNodeAction()); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, decommisionedNMsCount); // 1. Register the Node Manager // 2. Exclude the same Node Manager host // 3. Unregister the Node Manager MockNM nm2 = new MockNM("host2:1234", 5120, resourceTrackerService); RegisterNodeManagerResponse response2 = nm2.registerNode(); Assert.assertEquals(NodeAction.NORMAL, response2.getNodeAction()); writeToHostsFile("host1"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); request.setNodeId(nm2.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, shutdownNMsCount); checkDecommissionedNMCount(rm, ++decommisionedNMsCount); }
private ApplicationId submitAppAndGetAppId(String submitter, String queueName, boolean setupACLs) throws Exception { GetNewApplicationRequest newAppRequest = GetNewApplicationRequest.newInstance(); ApplicationClientProtocol submitterClient = getRMClientForUser(submitter); ApplicationId applicationId = submitterClient.getNewApplication(newAppRequest).getApplicationId(); Resource resource = BuilderUtils.newResource(1024, 1); Map<ApplicationAccessType, String> acls = createACLs(submitter, setupACLs); ContainerLaunchContext amContainerSpec = ContainerLaunchContext.newInstance(null, null, null, null, null, acls); ApplicationSubmissionContext appSubmissionContext = ApplicationSubmissionContext.newInstance( applicationId, "applicationName", queueName, null, amContainerSpec, false, true, 1, resource, "applicationType"); appSubmissionContext.setApplicationId(applicationId); appSubmissionContext.setQueue(queueName); SubmitApplicationRequest submitRequest = SubmitApplicationRequest.newInstance(appSubmissionContext); submitterClient.submitApplication(submitRequest); resourceManager.waitForState(applicationId, RMAppState.ACCEPTED); return applicationId; }
@Test public void testAdminRefreshQueuesWithLocalConfigurationProvider() throws IOException, YarnException { rm = new MockRM(configuration); rm.init(configuration); rm.start(); CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler(); int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications(); try { rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); Assert.assertEquals(maxAppsBefore, cs.getConfiguration().getMaximumSystemApplications()); } catch (Exception ex) { fail("Using localConfigurationProvider. Should not get any exception."); } }
/** * Test RM read NM next heartBeat Interval correctly from Configuration file, and NM get next * heartBeat Interval from RM correctly */ @Test(timeout = 50000) public void testGetNextHeartBeatInterval() throws Exception { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, "4000"); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(4000, nodeHeartbeat.getNextHeartBeatInterval()); NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true); Assert.assertEquals(4000, nodeHeartbeat2.getNextHeartBeatInterval()); }
@After public void tearDown() throws IOException { if (rm != null) { rm.stop(); } fs.delete(workingPath, true); fs.delete(tmpDir, true); }
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while ((rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse( (rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals( "Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
/** Decommissioning using a pre-configured include hosts file */ @Test public void testDecommissionWithIncludeHosts() throws Exception { writeToHostsFile("localhost", "host1", "host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert (metrics != null); int metricCount = metrics.getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host1", ip); rm.getNodesListManager().refreshNodes(conf); checkDecommissionedNMCount(rm, ++metricCount); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(1, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue( "Node is not decommisioned.", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals(metricCount, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); }
@After public void tearDown() { if (hostFile != null && hostFile.exists()) { hostFile.delete(); } if (rm != null) { rm.stop(); } }
@Before public void setup() throws InterruptedException, IOException { conf = createConfiguration(); rpc = YarnRPC.create(conf); rmAddress = conf.getSocketAddr( YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); AccessControlList adminACL = new AccessControlList(""); conf.set(YarnConfiguration.YARN_ADMIN_ACL, adminACL.getAclString()); resourceManager = new MockRM(conf) { protected ClientRMService createClientRMService() { return new ClientRMService( getRMContext(), this.scheduler, this.rmAppManager, this.applicationACLsManager, this.queueACLsManager, getRMContext().getRMDelegationTokenSecretManager()); }; @Override protected void doSecureLogin() throws IOException {} }; new Thread() { public void run() { resourceManager.start(); }; }.start(); int waitCount = 0; while (resourceManager.getServiceState() == STATE.INITED && waitCount++ < 60) { LOG.info("Waiting for RM to start..."); Thread.sleep(1500); } if (resourceManager.getServiceState() != STATE.STARTED) { // RM could have failed. throw new IOException( "ResourceManager failed to start. Final state is " + resourceManager.getServiceState()); } }
@Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1"); Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.nodes.include-path", hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = Records.newRecord(NodeId.class); nodeId.setHost("host2"); nodeId.setPort(1234); req.setNodeId(nodeId); req.setHttpPort(1234); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getRegistrationResponse().getNodeAction()); }
@Test public void testRMStartsWithoutConfigurationFilesProvided() { // enable FileSystemBasedConfigurationProvider without uploading // any configuration files into Remote File System. configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); // The configurationProvider will return NULL instead of // throwing out Exceptions, if there are no configuration files provided. // RM will not load the remote Configuration files, // and should start successfully. try { rm = new MockRM(configuration); rm.init(configuration); rm.start(); } catch (Exception ex) { fail("Should not get any exceptions"); } }
@Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); req.setNodeId(nodeId); req.setHttpPort(1234); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN, response.getNodeAction()); Assert.assertEquals( "Disallowed NodeManager from host2, Sending SHUTDOWN signal to the NodeManager.", response.getDiagnosticsMessage()); }
@Test public void testNodeRegistrationSuccess() throws Exception { writeToHostsFile("host2"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); Resource capability = BuilderUtils.newResource(1024, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); req.setNMVersion(YarnVersionInfo.getVersion()); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); }
@Test public void testReboot() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = new MockNM("host2:1234", 2048, rm.getResourceTrackerService()); int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs(); HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat( new HashMap<ApplicationId, List<ContainerStatus>>(), true, recordFactory.newRecordInstance(NodeId.class)); Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction())); checkRebootedNMCount(rm, ++initialMetricCount); }
@Test public void testUnhealthyNodeStatus() throws Exception { Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.nodes.exclude-path", hostFile.getAbsolutePath()); MockRM rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); // node unhealthy nm1.nodeHeartbeat(false); checkUnealthyNMCount(rm, nm1, true, 1); // node healthy again nm1.nodeHeartbeat(true); checkUnealthyNMCount(rm, nm1, false, 0); }
@Test public void testUnhealthyNodeStatus() throws Exception { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); // node unhealthy nm1.nodeHeartbeat(false); checkUnealthyNMCount(rm, nm1, true, 1); // node healthy again nm1.nodeHeartbeat(true); checkUnealthyNMCount(rm, nm1, false, 0); }
@Test public void testReboot() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:1234", 2048); int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100); Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); Assert.assertEquals( "Too far behind rm response id:0 nm response id:-100", nodeHeartbeat.getDiagnosticsMessage()); checkRebootedNMCount(rm, ++initialMetricCount); }
@Test public void testUnhealthyNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); // node unhealthy nm1.nodeHeartbeat(false); checkUnealthyNMCount(rm, nm1, true, 1); UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); }