/**
  * Get all the nodes in the cluster, this method generate RPC
  *
  * @return host names
  * @throws YarnException
  */
 private List<String> getClusterNodes() throws YarnException {
   List<String> result = new ArrayList<String>();
   GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
   try {
     GetClusterNodesResponse clusterNodesResp =
         applicationsManager.getClusterNodes(clusterNodesReq);
     List<NodeReport> nodeReports = clusterNodesResp.getNodeReports();
     for (NodeReport nodeReport : nodeReports) {
       result.add(nodeReport.getNodeId().getHost());
     }
   } catch (IOException e) {
     LOG.error("error getting cluster nodes from AM");
     throw new YarnException(e);
   }
   return result;
 }
Ejemplo n.º 2
0
 public static NodeReport newNodeReport(
     NodeId nodeId,
     NodeState nodeState,
     String httpAddress,
     String rackName,
     Resource used,
     Resource capability,
     int numContainers,
     String healthReport,
     long lastHealthReportTime) {
   NodeReport nodeReport = recordFactory.newRecordInstance(NodeReport.class);
   nodeReport.setNodeId(nodeId);
   nodeReport.setNodeState(nodeState);
   nodeReport.setHttpAddress(httpAddress);
   nodeReport.setRackName(rackName);
   nodeReport.setUsed(used);
   nodeReport.setCapability(capability);
   nodeReport.setNumContainers(numContainers);
   nodeReport.setHealthReport(healthReport);
   nodeReport.setLastHealthReportTime(lastHealthReportTime);
   return nodeReport;
 }
Ejemplo n.º 3
0
  @Test
  public void testSetupShutdown() throws Exception {
    GetClusterNodesRequest request = Records.newRecord(GetClusterNodesRequest.class);
    ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService();
    GetClusterNodesResponse response = clientRMService.getClusterNodes(request);
    List<NodeReport> nodeReports = response.getNodeReports();
    LOG.info("{}", nodeReports);

    for (NodeReport nr : nodeReports) {
      LOG.info("Node: {}", nr.getNodeId());
      LOG.info("Total memory: {}", nr.getCapability());
      LOG.info("Used memory: {}", nr.getUsed());
      LOG.info("Number containers: {}", nr.getNumContainers());
    }

    String appMasterJar = JarFinder.getJar(StreamingAppMaster.class);
    LOG.info("appmaster jar: " + appMasterJar);
    String testJar = JarFinder.getJar(StramMiniClusterTest.class);
    LOG.info("testJar: " + testJar);

    // create test application
    Properties dagProps = new Properties();

    // input module (ensure shutdown works while windows are generated)
    dagProps.put(
        StreamingApplication.DT_PREFIX + "operator.numGen.classname",
        TestGeneratorInputOperator.class.getName());
    dagProps.put(StreamingApplication.DT_PREFIX + "operator.numGen.maxTuples", "1");

    // fake output adapter - to be ignored when determine shutdown
    // props.put(DAGContext.DT_PREFIX + "stream.output.classname",
    // HDFSOutputStream.class.getName());
    // props.put(DAGContext.DT_PREFIX + "stream.output.inputNode", "module2");
    // props.put(DAGContext.DT_PREFIX + "stream.output.filepath",
    // "miniclustertest-testSetupShutdown.out");

    dagProps.put(
        StreamingApplication.DT_PREFIX + "operator.module1.classname",
        GenericTestOperator.class.getName());

    dagProps.put(
        StreamingApplication.DT_PREFIX + "operator.module2.classname",
        GenericTestOperator.class.getName());

    dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.source", "numGen.outport");
    dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.sinks", "module1.inport1");

    dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.source", "module1.outport1");
    dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.sinks", "module2.inport1");

    dagProps.setProperty(
        StreamingApplication.DT_PREFIX + LogicalPlan.MASTER_MEMORY_MB.getName(), "128");
    dagProps.setProperty(
        StreamingApplication.DT_PREFIX + LogicalPlan.CONTAINER_JVM_OPTIONS.getName(),
        "-Dlog4j.properties=custom_log4j.properties");
    dagProps.setProperty(
        StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.MEMORY_MB.getName(), "64");
    dagProps.setProperty(
        StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.VCORES.getName(), "1");
    dagProps.setProperty(
        StreamingApplication.DT_PREFIX
            + "operator.*.port.*."
            + Context.PortContext.BUFFER_MEMORY_MB.getName(),
        "32");
    dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.DEBUG.getName(), "true");
    // dagProps.setProperty(StreamingApplication.DT_PREFIX +
    // LogicalPlan.CONTAINERS_MAX_COUNT.getName(), "2");
    LOG.info("dag properties: {}", dagProps);

    LOG.info("Initializing Client");
    LogicalPlanConfiguration tb = new LogicalPlanConfiguration(conf);
    tb.addFromProperties(dagProps, null);
    LogicalPlan dag = createDAG(tb);
    Configuration yarnConf = new Configuration(yarnCluster.getConfig());
    StramClient client = new StramClient(yarnConf, dag);
    try {
      client.start();
      if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) {
        client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster
      }
      LOG.info("Running client");
      client.startApplication();
      boolean result = client.monitorApplication();

      LOG.info("Client run completed. Result=" + result);
      Assert.assertTrue(result);
    } finally {
      client.stop();
    }
  }
  @Test
  public void testAMRMUnusableNodes() throws Exception {

    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    MockNM nm2 = rm.registerNode("h2:1234", 5000);
    MockNM nm3 = rm.registerNode("h3:1234", 5000);
    MockNM nm4 = rm.registerNode("h4:1234", 5000);

    RMApp app1 = rm.submitApp(2000);

    // Trigger the scheduling so the AM gets 'launched' on nm1
    nm1.nodeHeartbeat(true);

    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());

    // register AM returns no unusable node
    am1.registerAppAttempt();

    // allocate request returns no updated node
    AllocateRequest allocateRequest1 =
        BuilderUtils.newAllocateRequest(attempt1.getAppAttemptId(), 0, 0F, null, null);
    AMResponse response1 = amService.allocate(allocateRequest1).getAMResponse();
    List<NodeReport> updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    syncNodeHeartbeat(nm4, false);

    // allocate request returns updated node
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    NodeReport nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());

    // resending the allocate request returns the same result
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());

    syncNodeLost(nm3);

    // subsequent allocate request returns delta
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm3.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.LOST, nr.getNodeState());

    // registering another AM gives it the complete failed list
    RMApp app2 = rm.submitApp(2000);
    // Trigger nm2 heartbeat so that AM gets launched on it
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());

    // register AM returns all unusable nodes
    am2.registerAppAttempt();

    // allocate request returns no updated node
    AllocateRequest allocateRequest2 =
        BuilderUtils.newAllocateRequest(attempt2.getAppAttemptId(), 0, 0F, null, null);
    AMResponse response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    syncNodeHeartbeat(nm4, true);

    // both AM's should get delta updated nodes
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());

    allocateRequest2 =
        BuilderUtils.newAllocateRequest(
            attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null);
    response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());

    // subsequent allocate calls should return no updated nodes
    allocateRequest2 =
        BuilderUtils.newAllocateRequest(
            attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null);
    response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    // how to do the above for LOST node

  }