예제 #1
0
 /**
  * create a summary of the application's runtime.
  *
  * @param app {@link RMApp} whose summary is to be created, cannot be <code>null</code>.
  */
 public static SummaryBuilder createAppSummary(RMApp app) {
   String trackingUrl = "N/A";
   String host = "N/A";
   RMAppAttempt attempt = app.getCurrentAppAttempt();
   if (attempt != null) {
     trackingUrl = attempt.getTrackingUrl();
     host = attempt.getHost();
   }
   RMAppMetrics metrics = app.getRMAppMetrics();
   SummaryBuilder summary =
       new SummaryBuilder()
           .add("appId", app.getApplicationId())
           .add("name", app.getName())
           .add("user", app.getUser())
           .add("queue", app.getQueue())
           .add("state", app.getState())
           .add("trackingUrl", trackingUrl)
           .add("appMasterHost", host)
           .add("startTime", app.getStartTime())
           .add("finishTime", app.getFinishTime())
           .add("finalStatus", app.getFinalApplicationStatus())
           .add("memorySeconds", metrics.getMemorySeconds())
           .add("vcoreSeconds", metrics.getVcoreSeconds())
           .add("preemptedAMContainers", metrics.getNumAMContainersPreempted())
           .add("preemptedNonAMContainers", metrics.getNumNonAMContainersPreempted())
           .add("preemptedResources", metrics.getResourcePreempted())
           .add("applicationType", app.getApplicationType());
   return summary;
 }
예제 #2
0
 public AppAttemptInfo(RMAppAttempt attempt) {
   this.startTime = 0;
   this.containerId = "";
   this.nodeHttpAddress = "";
   this.nodeId = "";
   this.logsLink = "";
   if (attempt != null) {
     this.id = attempt.getAppAttemptId().getAttemptId();
     this.startTime = attempt.getStartTime();
     Container masterContainer = attempt.getMasterContainer();
     if (masterContainer != null) {
       this.containerId = masterContainer.getId().toString();
       this.nodeHttpAddress = masterContainer.getNodeHttpAddress();
       this.nodeId = masterContainer.getNodeId().toString();
       this.logsLink =
           join(
               HttpConfig.getSchemePrefix(),
               masterContainer.getNodeHttpAddress(),
               "/node",
               "/containerlogs/",
               ConverterUtils.toString(masterContainer.getId()),
               "/",
               attempt.getSubmissionContext().getUser());
     }
   }
 }
  // This is to test fetching AM container will be retried, if AM container is
  // not fetchable since DNS is unavailable causing container token/NMtoken
  // creation failure.
  @Test(timeout = 20000)
  public void testAMContainerAllocationWhenDNSUnavailable() throws Exception {
    final YarnConfiguration conf = new YarnConfiguration();
    MockRM rm1 =
        new MockRM(conf) {
          @Override
          protected RMSecretManagerService createRMSecretManagerService() {
            return new TestRMSecretManagerService(conf, rmContext);
          }
        };
    rm1.start();

    MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000);
    SecurityUtilTestHelper.setTokenServiceUseIp(true);
    RMApp app1 = rm1.submitApp(200);
    RMAppAttempt attempt = app1.getCurrentAppAttempt();
    nm1.nodeHeartbeat(true);

    // fetching am container will fail, keep retrying 5 times.
    while (numRetries <= 5) {
      nm1.nodeHeartbeat(true);
      Thread.sleep(1000);
      Assert.assertEquals(RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
      System.out.println("Waiting for am container to be allocated.");
    }

    SecurityUtilTestHelper.setTokenServiceUseIp(false);
    rm1.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.ALLOCATED);
    MockRM.launchAndRegisterAM(app1, rm1, nm1);
  }
예제 #4
0
 /**
  * create a summary of the application's runtime.
  *
  * @param app {@link RMApp} whose summary is to be created, cannot be <code>null</code>.
  */
 public static SummaryBuilder createAppSummary(RMApp app) {
   String trackingUrl = "N/A";
   String host = "N/A";
   RMAppAttempt attempt = app.getCurrentAppAttempt();
   if (attempt != null) {
     trackingUrl = attempt.getTrackingUrl();
     host = attempt.getHost();
   }
   SummaryBuilder summary =
       new SummaryBuilder()
           .add("appId", app.getApplicationId())
           .add("name", app.getName())
           .add("user", app.getUser())
           .add("queue", app.getQueue())
           .add("state", app.getState())
           .add("trackingUrl", trackingUrl)
           .add("appMasterHost", host)
           .add("startTime", app.getStartTime())
           .add("finishTime", app.getFinishTime())
           .add("finalStatus", app.getFinalApplicationStatus());
   return summary;
 }
  @Test
  public void testARRMResponseId() throws Exception {

    MockNM nm1 = rm.registerNode("h1:1234", 5000);

    RMApp app = rm.submitApp(2000);

    // Trigger the scheduling so the AM gets 'launched'
    nm1.nodeHeartbeat(true);

    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());

    am.registerAppAttempt();

    AllocateRequest allocateRequest =
        BuilderUtils.newAllocateRequest(attempt.getAppAttemptId(), 0, 0F, null, null);

    AllocateResponse response = amService.allocate(allocateRequest);
    Assert.assertEquals(1, response.getResponseId());
    Assert.assertFalse(response.getReboot());
    allocateRequest =
        BuilderUtils.newAllocateRequest(
            attempt.getAppAttemptId(), response.getResponseId(), 0F, null, null);

    response = amService.allocate(allocateRequest);
    Assert.assertEquals(2, response.getResponseId());
    /* try resending */
    response = amService.allocate(allocateRequest);
    Assert.assertEquals(2, response.getResponseId());

    /** try sending old request again * */
    allocateRequest = BuilderUtils.newAllocateRequest(attempt.getAppAttemptId(), 0, 0F, null, null);
    response = amService.allocate(allocateRequest);
    Assert.assertTrue(response.getReboot());
  }
예제 #6
0
  @Test(timeout = 30000)
  public void testExcessReservationThanNodeManagerCapacity() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    YarnAPIStorageFactory.setConfiguration(conf);
    RMStorageFactory.setConfiguration(conf);
    MockRM rm = new MockRM(conf);
    try {
      rm.start();

      // Register node1
      MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4);
      MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4);

      nm1.nodeHeartbeat(true);
      nm2.nodeHeartbeat(true);
      // HOP :: Sleep to allow previous events to be processed
      Thread.sleep(
          conf.getInt(
                  YarnConfiguration.HOPS_PENDING_EVENTS_RETRIEVAL_PERIOD,
                  YarnConfiguration.DEFAULT_HOPS_PENDING_EVENTS_RETRIEVAL_PERIOD)
              * 2);
      // wait..
      int waitCount = 20;
      int size = rm.getRMContext().getActiveRMNodes().size();
      while ((size = rm.getRMContext().getActiveRMNodes().size()) != 2 && waitCount-- > 0) {
        LOG.info("Waiting for node managers to register : " + size);
        Thread.sleep(100);
      }
      Assert.assertEquals(2, rm.getRMContext().getActiveRMNodes().size());
      // Submit an application
      RMApp app1 = rm.submitApp(128);

      // kick the scheduling
      nm1.nodeHeartbeat(true);
      RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
      MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId(), nm1);
      am1.registerAppAttempt();

      LOG.info("sending container requests ");
      am1.addRequests(new String[] {"*"}, 3 * GB, 1, 1);
      AllocateResponse alloc1Response = am1.schedule(); // send the request

      // kick the scheduler
      nm1.nodeHeartbeat(true);
      int waitCounter = 20;
      LOG.info("heartbeating nm1");
      while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(500);
        alloc1Response = am1.schedule();
      }
      LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());

      // No container should be allocated.
      // Internally it should not been reserved.
      Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 0);

      LOG.info("heartbeating nm2");
      waitCounter = 20;
      nm2.nodeHeartbeat(true);
      while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(500);
        alloc1Response = am1.schedule();
      }
      LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
      Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 1);
    } finally {
      rm.stop();
    }
  }
  @Test
  public void testAMRMUnusableNodes() throws Exception {

    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    MockNM nm2 = rm.registerNode("h2:1234", 5000);
    MockNM nm3 = rm.registerNode("h3:1234", 5000);
    MockNM nm4 = rm.registerNode("h4:1234", 5000);

    RMApp app1 = rm.submitApp(2000);

    // Trigger the scheduling so the AM gets 'launched' on nm1
    nm1.nodeHeartbeat(true);

    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());

    // register AM returns no unusable node
    am1.registerAppAttempt();

    // allocate request returns no updated node
    AllocateRequest allocateRequest1 =
        BuilderUtils.newAllocateRequest(attempt1.getAppAttemptId(), 0, 0F, null, null);
    AMResponse response1 = amService.allocate(allocateRequest1).getAMResponse();
    List<NodeReport> updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    syncNodeHeartbeat(nm4, false);

    // allocate request returns updated node
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    NodeReport nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());

    // resending the allocate request returns the same result
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());

    syncNodeLost(nm3);

    // subsequent allocate request returns delta
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm3.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.LOST, nr.getNodeState());

    // registering another AM gives it the complete failed list
    RMApp app2 = rm.submitApp(2000);
    // Trigger nm2 heartbeat so that AM gets launched on it
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());

    // register AM returns all unusable nodes
    am2.registerAppAttempt();

    // allocate request returns no updated node
    AllocateRequest allocateRequest2 =
        BuilderUtils.newAllocateRequest(attempt2.getAppAttemptId(), 0, 0F, null, null);
    AMResponse response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    syncNodeHeartbeat(nm4, true);

    // both AM's should get delta updated nodes
    allocateRequest1 =
        BuilderUtils.newAllocateRequest(
            attempt1.getAppAttemptId(), response1.getResponseId(), 0F, null, null);
    response1 = amService.allocate(allocateRequest1).getAMResponse();
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());

    allocateRequest2 =
        BuilderUtils.newAllocateRequest(
            attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null);
    response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());

    // subsequent allocate calls should return no updated nodes
    allocateRequest2 =
        BuilderUtils.newAllocateRequest(
            attempt2.getAppAttemptId(), response2.getResponseId(), 0F, null, null);
    response2 = amService.allocate(allocateRequest2).getAMResponse();
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());

    // how to do the above for LOST node

  }