/** * create a summary of the application's runtime. * * @param app {@link RMApp} whose summary is to be created, cannot be <code>null</code>. */ public static SummaryBuilder createAppSummary(RMApp app) { String trackingUrl = "N/A"; String host = "N/A"; RMAppAttempt attempt = app.getCurrentAppAttempt(); if (attempt != null) { trackingUrl = attempt.getTrackingUrl(); host = attempt.getHost(); } RMAppMetrics metrics = app.getRMAppMetrics(); SummaryBuilder summary = new SummaryBuilder() .add("appId", app.getApplicationId()) .add("name", app.getName()) .add("user", app.getUser()) .add("queue", app.getQueue()) .add("state", app.getState()) .add("trackingUrl", trackingUrl) .add("appMasterHost", host) .add("startTime", app.getStartTime()) .add("finishTime", app.getFinishTime()) .add("finalStatus", app.getFinalApplicationStatus()) .add("memorySeconds", metrics.getMemorySeconds()) .add("vcoreSeconds", metrics.getVcoreSeconds()) .add("preemptedAMContainers", metrics.getNumAMContainersPreempted()) .add("preemptedNonAMContainers", metrics.getNumNonAMContainersPreempted()) .add("preemptedResources", metrics.getResourcePreempted()) .add("applicationType", app.getApplicationType()); return summary; }
public AppAttemptInfo(RMAppAttempt attempt) { this.startTime = 0; this.containerId = ""; this.nodeHttpAddress = ""; this.nodeId = ""; this.logsLink = ""; if (attempt != null) { this.id = attempt.getAppAttemptId().getAttemptId(); this.startTime = attempt.getStartTime(); Container masterContainer = attempt.getMasterContainer(); if (masterContainer != null) { this.containerId = masterContainer.getId().toString(); this.nodeHttpAddress = masterContainer.getNodeHttpAddress(); this.nodeId = masterContainer.getNodeId().toString(); this.logsLink = join( HttpConfig.getSchemePrefix(), masterContainer.getNodeHttpAddress(), "/node", "/containerlogs/", ConverterUtils.toString(masterContainer.getId()), "/", attempt.getSubmissionContext().getUser()); } } }
// This is to test fetching AM container will be retried, if AM container is // not fetchable since DNS is unavailable causing container token/NMtoken // creation failure. @Test(timeout = 20000) public void testAMContainerAllocationWhenDNSUnavailable() throws Exception { final YarnConfiguration conf = new YarnConfiguration(); MockRM rm1 = new MockRM(conf) { @Override protected RMSecretManagerService createRMSecretManagerService() { return new TestRMSecretManagerService(conf, rmContext); } }; rm1.start(); MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); SecurityUtilTestHelper.setTokenServiceUseIp(true); RMApp app1 = rm1.submitApp(200); RMAppAttempt attempt = app1.getCurrentAppAttempt(); nm1.nodeHeartbeat(true); // fetching am container will fail, keep retrying 5 times. while (numRetries <= 5) { nm1.nodeHeartbeat(true); Thread.sleep(1000); Assert.assertEquals(RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState()); System.out.println("Waiting for am container to be allocated."); } SecurityUtilTestHelper.setTokenServiceUseIp(false); rm1.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.ALLOCATED); MockRM.launchAndRegisterAM(app1, rm1, nm1); }
/** * create a summary of the application's runtime. * * @param app {@link RMApp} whose summary is to be created, cannot be <code>null</code>. */ public static SummaryBuilder createAppSummary(RMApp app) { String trackingUrl = "N/A"; String host = "N/A"; RMAppAttempt attempt = app.getCurrentAppAttempt(); if (attempt != null) { trackingUrl = attempt.getTrackingUrl(); host = attempt.getHost(); } SummaryBuilder summary = new SummaryBuilder() .add("appId", app.getApplicationId()) .add("name", app.getName()) .add("user", app.getUser()) .add("queue", app.getQueue()) .add("state", app.getState()) .add("trackingUrl", trackingUrl) .add("appMasterHost", host) .add("startTime", app.getStartTime()) .add("finishTime", app.getFinishTime()) .add("finalStatus", app.getFinalApplicationStatus()); return summary; }
@Test public void testARRMResponseId() throws Exception { MockNM nm1 = rm.registerNode("h1:1234", 5000); RMApp app = rm.submitApp(2000); // Trigger the scheduling so the AM gets 'launched' nm1.nodeHeartbeat(true); RMAppAttempt attempt = app.getCurrentAppAttempt(); MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId()); am.registerAppAttempt(); AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(attempt.getAppAttemptId(), 0, 0F, null, null); AllocateResponse response = amService.allocate(allocateRequest); Assert.assertEquals(1, response.getResponseId()); Assert.assertFalse(response.getReboot()); allocateRequest = BuilderUtils.newAllocateRequest( attempt.getAppAttemptId(), response.getResponseId(), 0F, null, null); response = amService.allocate(allocateRequest); Assert.assertEquals(2, response.getResponseId()); /* try resending */ response = amService.allocate(allocateRequest); Assert.assertEquals(2, response.getResponseId()); /** try sending old request again * */ allocateRequest = BuilderUtils.newAllocateRequest(attempt.getAppAttemptId(), 0, 0F, null, null); response = amService.allocate(allocateRequest); Assert.assertTrue(response.getReboot()); }
@Test(timeout = 30000) public void testExcessReservationThanNodeManagerCapacity() throws Exception { YarnConfiguration conf = new YarnConfiguration(); YarnAPIStorageFactory.setConfiguration(conf); RMStorageFactory.setConfiguration(conf); MockRM rm = new MockRM(conf); try { rm.start(); // Register node1 MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4); MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(true); // HOP :: Sleep to allow previous events to be processed Thread.sleep( conf.getInt( YarnConfiguration.HOPS_PENDING_EVENTS_RETRIEVAL_PERIOD, YarnConfiguration.DEFAULT_HOPS_PENDING_EVENTS_RETRIEVAL_PERIOD) * 2); // wait.. int waitCount = 20; int size = rm.getRMContext().getActiveRMNodes().size(); while ((size = rm.getRMContext().getActiveRMNodes().size()) != 2 && waitCount-- > 0) { LOG.info("Waiting for node managers to register : " + size); Thread.sleep(100); } Assert.assertEquals(2, rm.getRMContext().getActiveRMNodes().size()); // Submit an application RMApp app1 = rm.submitApp(128); // kick the scheduling nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId(), nm1); am1.registerAppAttempt(); LOG.info("sending container requests "); am1.addRequests(new String[] {"*"}, 3 * GB, 1, 1); AllocateResponse alloc1Response = am1.schedule(); // send the request // kick the scheduler nm1.nodeHeartbeat(true); int waitCounter = 20; LOG.info("heartbeating nm1"); while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) { LOG.info("Waiting for containers to be created for app 1..."); Thread.sleep(500); alloc1Response = am1.schedule(); } LOG.info("received container : " + alloc1Response.getAllocatedContainers().size()); // No container should be allocated. // Internally it should not been reserved. Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 0); LOG.info("heartbeating nm2"); waitCounter = 20; nm2.nodeHeartbeat(true); while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) { LOG.info("Waiting for containers to be created for app 1..."); Thread.sleep(500); alloc1Response = am1.schedule(); } LOG.info("received container : " + alloc1Response.getAllocatedContainers().size()); Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 1); } finally { rm.stop(); } }
@Test public void testAMRMUnusableNodes() throws Exception { MockNM nm1 = rm.registerNode("h1:1234", 5000); MockNM nm2 = rm.registerNode("h2:1234", 5000); MockNM nm3 = rm.registerNode("h3:1234", 5000); MockNM nm4 = rm.registerNode("h4:1234", 5000); RMApp app1 = rm.submitApp(2000); // Trigger the scheduling so the AM gets 'launched' on nm1 nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); // register AM returns no unusable node am1.registerAppAttempt(); // allocate request returns no updated node AllocateRequest allocateRequest1 = BuilderUtils.newAllocateRequest(attempt1.getAppAttemptId(), 0, 0F, null, null); AMResponse response1 = amService.allocate(allocateRequest1).getAMResponse(); List<NodeReport> updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); syncNodeHeartbeat(nm4, false); // allocate request returns updated node allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); NodeReport nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState()); // resending the allocate request returns the same result response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState()); syncNodeLost(nm3); // subsequent allocate request returns delta allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm3.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.LOST, nr.getNodeState()); // registering another AM gives it the complete failed list RMApp app2 = rm.submitApp(2000); // Trigger nm2 heartbeat so that AM gets launched on it nm2.nodeHeartbeat(true); RMAppAttempt attempt2 = app2.getCurrentAppAttempt(); MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId()); // register AM returns all unusable nodes am2.registerAppAttempt(); // allocate request returns no updated node AllocateRequest allocateRequest2 = BuilderUtils.newAllocateRequest(attempt2.getAppAttemptId(), 0, 0F, null, null); AMResponse response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); syncNodeHeartbeat(nm4, true); // both AM's should get delta updated nodes allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.RUNNING, nr.getNodeState()); allocateRequest2 = BuilderUtils.newAllocateRequest( attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null); response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.RUNNING, nr.getNodeState()); // subsequent allocate calls should return no updated nodes allocateRequest2 = BuilderUtils.newAllocateRequest( attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null); response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); // how to do the above for LOST node }