@Test public void testAppAttemptMetrics() throws Exception { AsyncDispatcher dispatcher = new InlineDispatcher(); RMContext rmContext = new RMContextImpl(null, dispatcher, null, null, null, null, null); FifoScheduler schedular = new FifoScheduler(); schedular.reinitialize(new Configuration(), null, rmContext); ApplicationId appId = BuilderUtils.newApplicationId(200, 1); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 1); SchedulerEvent event = new AppAddedSchedulerEvent(appAttemptId, "queue", "user"); schedular.handle(event); appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2); event = new AppAddedSchedulerEvent(appAttemptId, "queue", "user"); schedular.handle(event); QueueMetrics metrics = schedular.getRootQueueMetrics(); Assert.assertEquals(1, metrics.getAppsSubmitted()); }
public void verifyClusterMetrics( int submittedApps, int completedApps, int reservedMB, int availableMB, int allocMB, int containersAlloc, int totalMB, int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int rebootedNodes, int activeNodes) throws JSONException, Exception { ResourceScheduler rs = rm.getResourceScheduler(); QueueMetrics metrics = rs.getRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics(); long totalMBExpect = metrics.getAvailableMB() + metrics.getAllocatedMB(); assertEquals("appsSubmitted doesn't match", metrics.getAppsSubmitted(), submittedApps); assertEquals("appsCompleted doesn't match", metrics.getAppsCompleted(), completedApps); assertEquals("reservedMB doesn't match", metrics.getReservedMB(), reservedMB); assertEquals("availableMB doesn't match", metrics.getAvailableMB(), availableMB); assertEquals("allocatedMB doesn't match", metrics.getAllocatedMB(), allocMB); assertEquals("containersAllocated doesn't match", 0, containersAlloc); assertEquals("totalMB doesn't match", totalMBExpect, totalMB); assertEquals( "totalNodes doesn't match", clusterMetrics.getNumActiveNMs() + clusterMetrics.getNumLostNMs() + clusterMetrics.getNumDecommisionedNMs() + clusterMetrics.getNumRebootedNMs() + clusterMetrics.getUnhealthyNMs(), totalNodes); assertEquals("lostNodes doesn't match", clusterMetrics.getNumLostNMs(), lostNodes); assertEquals("unhealthyNodes doesn't match", clusterMetrics.getUnhealthyNMs(), unhealthyNodes); assertEquals( "decommissionedNodes doesn't match", clusterMetrics.getNumDecommisionedNMs(), decommissionedNodes); assertEquals("rebootedNodes doesn't match", clusterMetrics.getNumRebootedNMs(), rebootedNodes); assertEquals("activeNodes doesn't match", clusterMetrics.getNumActiveNMs(), activeNodes); }
@Test public void testReconnectNode() throws Exception { final DrainDispatcher dispatcher = new DrainDispatcher(); rm = new MockRM() { @Override protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() { return new SchedulerEventDispatcher(this.scheduler) { @Override public void handle(SchedulerEvent event) { scheduler.handle(event); } }; } @Override protected Dispatcher createDispatcher() { return dispatcher; } }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 5120); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(false); dispatcher.await(); checkUnealthyNMCount(rm, nm2, true, 1); final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs(); QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics(); // TODO Metrics incorrect in case of the FifoScheduler Assert.assertEquals(5120, metrics.getAvailableMB()); // reconnect of healthy node nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse response = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // reconnect of unhealthy node nm2 = rm.registerNode("host2:5678", 5120); response = nm2.nodeHeartbeat(false); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); // unhealthy node changed back to healthy nm2 = rm.registerNode("host2:5678", 5120); dispatcher.await(); response = nm2.nodeHeartbeat(true); response = nm2.nodeHeartbeat(true); dispatcher.await(); Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); // reconnect of node with changed capability nm1 = rm.registerNode("host2:5678", 10240); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 10240, metrics.getAvailableMB()); // reconnect of node with changed capability and running applications List<ApplicationId> runningApps = new ArrayList<ApplicationId>(); runningApps.add(ApplicationId.newInstance(1, 0)); nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); // reconnect healthy node changing http port nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService()); nm1.setHttpPort(3); nm1.registerNode(); dispatcher.await(); response = nm1.nodeHeartbeat(true); response = nm1.nodeHeartbeat(true); dispatcher.await(); RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); Assert.assertEquals(3, rmNode.getHttpPort()); Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory()); Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); }