/** * Get all the nodes in the cluster, this method generate RPC * * @return host names * @throws YarnException */ private List<String> getClusterNodes() throws YarnException { List<String> result = new ArrayList<String>(); GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); try { GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); List<NodeReport> nodeReports = clusterNodesResp.getNodeReports(); for (NodeReport nodeReport : nodeReports) { result.add(nodeReport.getNodeId().getHost()); } } catch (IOException e) { LOG.error("error getting cluster nodes from AM"); throw new YarnException(e); } return result; }
public static NodeReport newNodeReport( NodeId nodeId, NodeState nodeState, String httpAddress, String rackName, Resource used, Resource capability, int numContainers, String healthReport, long lastHealthReportTime) { NodeReport nodeReport = recordFactory.newRecordInstance(NodeReport.class); nodeReport.setNodeId(nodeId); nodeReport.setNodeState(nodeState); nodeReport.setHttpAddress(httpAddress); nodeReport.setRackName(rackName); nodeReport.setUsed(used); nodeReport.setCapability(capability); nodeReport.setNumContainers(numContainers); nodeReport.setHealthReport(healthReport); nodeReport.setLastHealthReportTime(lastHealthReportTime); return nodeReport; }
@Test public void testSetupShutdown() throws Exception { GetClusterNodesRequest request = Records.newRecord(GetClusterNodesRequest.class); ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService(); GetClusterNodesResponse response = clientRMService.getClusterNodes(request); List<NodeReport> nodeReports = response.getNodeReports(); LOG.info("{}", nodeReports); for (NodeReport nr : nodeReports) { LOG.info("Node: {}", nr.getNodeId()); LOG.info("Total memory: {}", nr.getCapability()); LOG.info("Used memory: {}", nr.getUsed()); LOG.info("Number containers: {}", nr.getNumContainers()); } String appMasterJar = JarFinder.getJar(StreamingAppMaster.class); LOG.info("appmaster jar: " + appMasterJar); String testJar = JarFinder.getJar(StramMiniClusterTest.class); LOG.info("testJar: " + testJar); // create test application Properties dagProps = new Properties(); // input module (ensure shutdown works while windows are generated) dagProps.put( StreamingApplication.DT_PREFIX + "operator.numGen.classname", TestGeneratorInputOperator.class.getName()); dagProps.put(StreamingApplication.DT_PREFIX + "operator.numGen.maxTuples", "1"); // fake output adapter - to be ignored when determine shutdown // props.put(DAGContext.DT_PREFIX + "stream.output.classname", // HDFSOutputStream.class.getName()); // props.put(DAGContext.DT_PREFIX + "stream.output.inputNode", "module2"); // props.put(DAGContext.DT_PREFIX + "stream.output.filepath", // "miniclustertest-testSetupShutdown.out"); dagProps.put( StreamingApplication.DT_PREFIX + "operator.module1.classname", GenericTestOperator.class.getName()); dagProps.put( StreamingApplication.DT_PREFIX + "operator.module2.classname", GenericTestOperator.class.getName()); dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.source", "numGen.outport"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.sinks", "module1.inport1"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.source", "module1.outport1"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.sinks", "module2.inport1"); dagProps.setProperty( StreamingApplication.DT_PREFIX + LogicalPlan.MASTER_MEMORY_MB.getName(), "128"); dagProps.setProperty( StreamingApplication.DT_PREFIX + LogicalPlan.CONTAINER_JVM_OPTIONS.getName(), "-Dlog4j.properties=custom_log4j.properties"); dagProps.setProperty( StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.MEMORY_MB.getName(), "64"); dagProps.setProperty( StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.VCORES.getName(), "1"); dagProps.setProperty( StreamingApplication.DT_PREFIX + "operator.*.port.*." + Context.PortContext.BUFFER_MEMORY_MB.getName(), "32"); dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.DEBUG.getName(), "true"); // dagProps.setProperty(StreamingApplication.DT_PREFIX + // LogicalPlan.CONTAINERS_MAX_COUNT.getName(), "2"); LOG.info("dag properties: {}", dagProps); LOG.info("Initializing Client"); LogicalPlanConfiguration tb = new LogicalPlanConfiguration(conf); tb.addFromProperties(dagProps, null); LogicalPlan dag = createDAG(tb); Configuration yarnConf = new Configuration(yarnCluster.getConfig()); StramClient client = new StramClient(yarnConf, dag); try { client.start(); if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) { client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster } LOG.info("Running client"); client.startApplication(); boolean result = client.monitorApplication(); LOG.info("Client run completed. Result=" + result); Assert.assertTrue(result); } finally { client.stop(); } }
@Test public void testAMRMUnusableNodes() throws Exception { MockNM nm1 = rm.registerNode("h1:1234", 5000); MockNM nm2 = rm.registerNode("h2:1234", 5000); MockNM nm3 = rm.registerNode("h3:1234", 5000); MockNM nm4 = rm.registerNode("h4:1234", 5000); RMApp app1 = rm.submitApp(2000); // Trigger the scheduling so the AM gets 'launched' on nm1 nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); // register AM returns no unusable node am1.registerAppAttempt(); // allocate request returns no updated node AllocateRequest allocateRequest1 = BuilderUtils.newAllocateRequest(attempt1.getAppAttemptId(), 0, 0F, null, null); AMResponse response1 = amService.allocate(allocateRequest1).getAMResponse(); List<NodeReport> updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); syncNodeHeartbeat(nm4, false); // allocate request returns updated node allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); NodeReport nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState()); // resending the allocate request returns the same result response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState()); syncNodeLost(nm3); // subsequent allocate request returns delta allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm3.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.LOST, nr.getNodeState()); // registering another AM gives it the complete failed list RMApp app2 = rm.submitApp(2000); // Trigger nm2 heartbeat so that AM gets launched on it nm2.nodeHeartbeat(true); RMAppAttempt attempt2 = app2.getCurrentAppAttempt(); MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId()); // register AM returns all unusable nodes am2.registerAppAttempt(); // allocate request returns no updated node AllocateRequest allocateRequest2 = BuilderUtils.newAllocateRequest(attempt2.getAppAttemptId(), 0, 0F, null, null); AMResponse response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); syncNodeHeartbeat(nm4, true); // both AM's should get delta updated nodes allocateRequest1 = BuilderUtils.newAllocateRequest( attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null); response1 = amService.allocate(allocateRequest1).getAMResponse(); updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.RUNNING, nr.getNodeState()); allocateRequest2 = BuilderUtils.newAllocateRequest( attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null); response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); nr = updatedNodes.iterator().next(); Assert.assertEquals(nm4.getNodeId(), nr.getNodeId()); Assert.assertEquals(NodeState.RUNNING, nr.getNodeState()); // subsequent allocate calls should return no updated nodes allocateRequest2 = BuilderUtils.newAllocateRequest( attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null); response2 = amService.allocate(allocateRequest2).getAMResponse(); updatedNodes = response2.getUpdatedNodes(); Assert.assertEquals(0, updatedNodes.size()); // how to do the above for LOST node }