@Test public void testAdminRefreshQueuesWithFileSystemBasedConfigurationProvider() throws IOException, YarnException { configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); // upload default configurations uploadDefaultConfiguration(); try { rm = new MockRM(configuration); rm.init(configuration); rm.start(); } catch (Exception ex) { fail("Should not get any exceptions"); } CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler(); int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications(); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); csConf.set("yarn.scheduler.capacity.maximum-applications", "5000"); uploadConfiguration(csConf, "capacity-scheduler.xml"); rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); int maxAppsAfter = cs.getConfiguration().getMaximumSystemApplications(); Assert.assertEquals(maxAppsAfter, 5000); Assert.assertTrue(maxAppsAfter != maxAppsBefore); }
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while ((rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse( (rm.getRMContext().getRMNodes().get(nm1.getNodeId()).getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals( "Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
@Test public void testAdminRefreshQueuesWithLocalConfigurationProvider() throws IOException, YarnException { rm = new MockRM(configuration); rm.init(configuration); rm.start(); CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler(); int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications(); try { rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); Assert.assertEquals(maxAppsBefore, cs.getConfiguration().getMaximumSystemApplications()); } catch (Exception ex) { fail("Using localConfigurationProvider. Should not get any exception."); } }
@Test public void testRMHAWithFileSystemBasedConfiguration() throws IOException, YarnException { StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER); configuration.set( YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); configuration.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2"); int base = 100; for (String confKey : YarnConfiguration.getServiceAddressConfKeys(configuration)) { configuration.set(HAUtil.addSuffix(confKey, "rm1"), "0.0.0.0:" + (base + 20)); configuration.set(HAUtil.addSuffix(confKey, "rm2"), "0.0.0.0:" + (base + 40)); base = base * 2; } Configuration conf1 = new Configuration(configuration); conf1.set(YarnConfiguration.RM_HA_ID, "rm1"); Configuration conf2 = new Configuration(configuration); conf2.set(YarnConfiguration.RM_HA_ID, "rm2"); // upload default configurations uploadDefaultConfiguration(); MockRM rm1 = null; MockRM rm2 = null; try { rm1 = new MockRM(conf1); rm1.init(conf1); rm1.start(); Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.STANDBY); rm2 = new MockRM(conf2); rm2.init(conf1); rm2.start(); Assert.assertTrue(rm2.getRMContext().getHAServiceState() == HAServiceState.STANDBY); rm1.adminService.transitionToActive(requestInfo); Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.ACTIVE); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); csConf.set("yarn.scheduler.capacity.maximum-applications", "5000"); uploadConfiguration(csConf, "capacity-scheduler.xml"); rm1.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); int maxApps = ((CapacityScheduler) rm1.getRMContext().getScheduler()) .getConfiguration() .getMaximumSystemApplications(); Assert.assertEquals(maxApps, 5000); // Before failover happens, the maxApps is // still the default value on the standby rm : rm2 int maxAppsBeforeFailOver = ((CapacityScheduler) rm2.getRMContext().getScheduler()) .getConfiguration() .getMaximumSystemApplications(); Assert.assertEquals(maxAppsBeforeFailOver, 10000); // Do the failover rm1.adminService.transitionToStandby(requestInfo); rm2.adminService.transitionToActive(requestInfo); Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.STANDBY); Assert.assertTrue(rm2.getRMContext().getHAServiceState() == HAServiceState.ACTIVE); int maxAppsAfter = ((CapacityScheduler) rm2.getRMContext().getScheduler()) .getConfiguration() .getMaximumSystemApplications(); Assert.assertEquals(maxAppsAfter, 5000); } finally { if (rm1 != null) { rm1.stop(); } if (rm2 != null) { rm2.stop(); } } }
@Test public void testReconnectNode() throws Exception { final DrainDispatcher dispatcher = new DrainDispatcher(); rm = new MockRM() { @Override protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() { return new SchedulerEventDispatcher(this.scheduler) { @Override public void handle(SchedulerEvent event) { scheduler.handle(event); } }; } @Override protected Dispatcher createDispatcher() { return dispatcher; } }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 5120); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(false); dispatcher.await(); checkUnealthyNMCount(rm, nm2, true, 1); final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs(); QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics(); // TODO Metrics incorrect in case of the FifoScheduler Assert.assertEquals(5120, metrics.getAvailableMB()); // reconnect of healthy node nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse response = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // reconnect of unhealthy node nm2 = rm.registerNode("host2:5678", 5120); response = nm2.nodeHeartbeat(false); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // unhealthy node changed back to healthy nm2 = rm.registerNode("host2:5678", 5120); dispatcher.await(); response = nm2.nodeHeartbeat(true); response = nm2.nodeHeartbeat(true); dispatcher.await(); Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); // reconnect of node with changed capability nm1 = rm.registerNode("host2:5678", 10240); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 10240, metrics.getAvailableMB()); // reconnect of node with changed capability and running applications List<ApplicationId> runningApps = new ArrayList<ApplicationId>(); runningApps.add(ApplicationId.newInstance(1, 0)); nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); // reconnect healthy node changing http port nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); nm1.setHttpPort(3); nm1.registerNode(); dispatcher.await(); response = nm1.nodeHeartbeat(true); response = nm1.nodeHeartbeat(true); dispatcher.await(); RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); Assert.assertEquals(3, rmNode.getHttpPort()); Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory()); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Test public void testHandleContainerStatusInvalidCompletions() throws Exception { rm = new MockRM(new YarnConfiguration()); rm.start(); EventHandler handler = spy(rm.getRMContext().getDispatcher().getEventHandler()); // Case 1: Unmanaged AM RMApp app = rm.submitApp(1024, true); // Case 1.1: AppAttemptId is null NMContainerStatus report = NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); // Case 1.2: Master container is null RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt(); currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); // Case 2: Managed AM app = rm.submitApp(1024); // Case 2.1: AppAttemptId is null report = NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } verify(handler, never()).handle((Event) any()); // Case 2.2: Master container is null currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt(); currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } verify(handler, never()).handle((Event) any()); }