Example #1
0
  public AMAttemptInfo(AMInfo amInfo, String jobId, String user) {

    this.nodeHttpAddress = "";
    this.nodeId = "";
    String nmHost = amInfo.getNodeManagerHost();
    int nmHttpPort = amInfo.getNodeManagerHttpPort();
    int nmPort = amInfo.getNodeManagerPort();
    if (nmHost != null) {
      this.nodeHttpAddress = nmHost + ":" + nmHttpPort;
      NodeId nodeId = BuilderUtils.newNodeId(nmHost, nmPort);
      this.nodeId = nodeId.toString();
    }

    this.id = amInfo.getAppAttemptId().getAttemptId();
    this.startTime = amInfo.getStartTime();
    this.containerId = "";
    this.logsLink = "";
    ContainerId containerId = amInfo.getContainerId();
    if (containerId != null) {
      this.containerId = containerId.toString();
      this.logsLink =
          join(
              HttpConfig.getSchemePrefix() + nodeHttpAddress,
              ujoin("node", "containerlogs", this.containerId, user));
    }
  }
  private boolean shouldUploadLogs(ContainerId containerId, boolean wasContainerSuccessful) {

    // All containers
    if (this.retentionPolicy.equals(ContainerLogsRetentionPolicy.ALL_CONTAINERS)) {
      return true;
    }

    // AM Container only
    if (this.retentionPolicy.equals(ContainerLogsRetentionPolicy.APPLICATION_MASTER_ONLY)) {
      if (containerId.getId() == 1) {
        return true;
      }
      return false;
    }

    // AM + Failing containers
    if (this.retentionPolicy.equals(ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY)) {
      if (containerId.getId() == 1) {
        return true;
      } else if (!wasContainerSuccessful) {
        return true;
      }
      return false;
    }
    return false;
  }
  /**
   * Changed the return type to AllocateResponse which use to hold a reference to AMResponse.
   *
   * <p>AMResponse seems to have disappeared in CDH 4.6
   *
   * @param requestedContainers
   * @param releasedContainers
   * @return
   * @throws YarnRemoteException
   */
  public AllocateResponse allocateRequest(
      List<ResourceRequest> requestedContainers, List<ContainerId> releasedContainers)
      throws YarnRemoteException {

    if (amResourceManager == null)
      throw new IllegalStateException(
          "Cannot send allocation request before connecting to the resource manager!");

    LOG.info(
        "Sending allocation request"
            + ", requestedSize="
            + requestedContainers.size()
            + ", releasedSize="
            + releasedContainers.size());

    for (ResourceRequest req : requestedContainers)
      LOG.info(
          "Requesting container, host="
              + req.getHostName()
              + ", amount="
              + req.getNumContainers()
              + ", memory="
              + req.getCapability().getMemory()
              + ", priority="
              + req.getPriority().getPriority());

    for (ContainerId rel : releasedContainers) LOG.info("Releasing container: " + rel.getId());

    AllocateRequest request = Records.newRecord(AllocateRequest.class);
    request.setResponseId(rmRequestId.incrementAndGet());
    request.setApplicationAttemptId(appAttemptId);
    request.addAllAsks(requestedContainers);
    request.addAllReleases(releasedContainers);

    AllocateResponse response = amResourceManager.allocate(request);

    // response.getAllocatedContainers()

    LOG.debug(
        "Got an allocation response, "
            + ", responseId="
            + response.getResponseId()
            + ", numClusterNodes="
            + response.getNumClusterNodes()
            + ", headroom="
            + response.getAvailableResources().getMemory()
            + ", allocatedSize="
            + response.getAllocatedContainers().size()
            + ", updatedNodes="
            + response.getUpdatedNodes().size()
            + ", reboot="
            + response.getReboot()
            + ", completedSize="
            + response.getCompletedContainersStatuses().size());

    return response;
  }
  public static void main(String[] args) {
    try {
      Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
      String containerIdStr = System.getenv(Environment.CONTAINER_ID.name());
      String nodeHostString = System.getenv(Environment.NM_HOST.name());
      String nodePortString = System.getenv(Environment.NM_PORT.name());
      String nodeHttpPortString = System.getenv(Environment.NM_HTTP_PORT.name());
      String appSubmitTimeStr = System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV);

      validateInputParam(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV);

      ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
      ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();

      long appSubmitTime = Long.parseLong(appSubmitTimeStr);

      Configuration conf = new Configuration(new YarnConfiguration());
      TezUtils.addUserSpecifiedTezConfiguration(conf);

      String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name());

      // Do not automatically close FileSystem objects so that in case of
      // SIGTERM I have a chance to write out the job history. I'll be closing
      // the objects myself.
      conf.setBoolean("fs.automatic.close", false);

      // Command line options
      Options opts = new Options();
      opts.addOption(
          TezConstants.TEZ_SESSION_MODE_CLI_OPTION,
          false,
          "Run Tez Application Master in Session mode");

      CommandLine cliParser = new GnuParser().parse(opts, args);

      DAGAppMaster appMaster =
          new DAGAppMaster(
              applicationAttemptId,
              containerId,
              nodeHostString,
              Integer.parseInt(nodePortString),
              Integer.parseInt(nodeHttpPortString),
              appSubmitTime,
              cliParser.hasOption(TezConstants.TEZ_SESSION_MODE_CLI_OPTION));
      ShutdownHookManager.get()
          .addShutdownHook(new DAGAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY);

      initAndStartAppMaster(appMaster, conf, jobUserName);

    } catch (Throwable t) {
      LOG.fatal("Error starting DAGAppMaster", t);
      System.exit(1);
    }
  }
Example #5
0
  // This is to test container tokens are generated when the containers are
  // acquired by the AM, not when the containers are allocated
  @Test
  public void testContainerTokenGeneratedOnPullRequest() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    YarnAPIStorageFactory.setConfiguration(conf);
    RMStorageFactory.setConfiguration(conf);
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    MockRM rm1 = new MockRM(conf);
    try {
      rm1.start();

      MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 8000);
      RMApp app1 = rm1.submitApp(200);
      MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
      // request a container.
      am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
      ContainerId containerId2 = ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
      rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);

      RMContainer container = rm1.getResourceScheduler().getRMContainer(containerId2);
      // no container token is generated.
      Assert.assertEquals(containerId2, container.getContainerId());
      Assert.assertNull(container.getContainer().getContainerToken());

      // acquire the container.
      List<Container> containers =
          am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
              .getAllocatedContainers();
      Assert.assertEquals(containerId2, containers.get(0).getId());
      // container token is generated.
      Assert.assertNotNull(containers.get(0).getContainerToken());
    } finally {
      rm1.stop();
    }
  }
Example #6
0
  @Test
  public void testNormalContainerAllocationWhenDNSUnavailable() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    YarnAPIStorageFactory.setConfiguration(conf);
    RMStorageFactory.setConfiguration(conf);
    MockRM rm1 = new MockRM(conf);
    try {
      rm1.start();
      MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000);
      RMApp app1 = rm1.submitApp(200);
      MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);

      // request a container.
      am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
      ContainerId containerId2 = ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
      rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);

      // acquire the container.
      SecurityUtilTestHelper.setTokenServiceUseIp(true);
      List<Container> containers =
          am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
              .getAllocatedContainers();
      // not able to fetch the container;
      Assert.assertEquals(0, containers.size());

      SecurityUtilTestHelper.setTokenServiceUseIp(false);
      containers =
          am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
              .getAllocatedContainers();
      // should be able to fetch the container;
      Assert.assertEquals(1, containers.size());
    } finally {
      rm1.stop();
    }
  }
Example #7
0
 public static ContainerId newContainerId(
     RecordFactory recordFactory,
     ApplicationId appId,
     ApplicationAttemptId appAttemptId,
     int containerId) {
   return ContainerId.newInstance(appAttemptId, containerId);
 }
 static String createFailureLog(
     String user,
     String operation,
     String perm,
     String target,
     String description,
     ApplicationId appId,
     ApplicationAttemptId attemptId,
     ContainerId containerId,
     CallerContext callerContext) {
   StringBuilder b = new StringBuilder();
   start(Keys.USER, user, b);
   addRemoteIP(b);
   add(Keys.OPERATION, operation, b);
   add(Keys.TARGET, target, b);
   add(Keys.RESULT, AuditConstants.FAILURE, b);
   add(Keys.DESCRIPTION, description, b);
   add(Keys.PERMISSIONS, perm, b);
   if (appId != null) {
     add(Keys.APPID, appId.toString(), b);
   }
   if (attemptId != null) {
     add(Keys.APPATTEMPTID, attemptId.toString(), b);
   }
   if (containerId != null) {
     add(Keys.CONTAINERID, containerId.toString(), b);
   }
   appendCallerContext(b, callerContext);
   return b.toString();
 }
 /** A helper api for creating an audit log for a successful event. */
 static String createSuccessLog(
     String user,
     String operation,
     String target,
     ApplicationId appId,
     ApplicationAttemptId attemptId,
     ContainerId containerId,
     CallerContext callerContext) {
   StringBuilder b = new StringBuilder();
   start(Keys.USER, user, b);
   addRemoteIP(b);
   add(Keys.OPERATION, operation, b);
   add(Keys.TARGET, target, b);
   add(Keys.RESULT, AuditConstants.SUCCESS, b);
   if (appId != null) {
     add(Keys.APPID, appId.toString(), b);
   }
   if (attemptId != null) {
     add(Keys.APPATTEMPTID, attemptId.toString(), b);
   }
   if (containerId != null) {
     add(Keys.CONTAINERID, containerId.toString(), b);
   }
   appendCallerContext(b, callerContext);
   return b.toString();
 }
  @Test(timeout = 10000)
  public void testGetContainers() throws YarnException, IOException {
    Configuration conf = new Configuration();
    final YarnClient client = new MockYarnClient();
    client.init(conf);
    client.start();

    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
    List<ContainerReport> reports = client.getContainers(appAttemptId);
    Assert.assertNotNull(reports);
    Assert.assertEquals(
        reports.get(0).getContainerId(), (ContainerId.newContainerId(appAttemptId, 1)));
    Assert.assertEquals(
        reports.get(1).getContainerId(), (ContainerId.newContainerId(appAttemptId, 2)));
    client.stop();
  }
 private RMContainer createRMContainer(ApplicationAttemptId appAttId, int id, Resource resource) {
   ContainerId containerId = ContainerId.newInstance(appAttId, id);
   RMContainer rmContainer = mock(RMContainer.class);
   Container container = mock(Container.class);
   when(container.getResource()).thenReturn(resource);
   when(container.getNodeId()).thenReturn(nodeId);
   when(rmContainer.getContainer()).thenReturn(container);
   when(rmContainer.getContainerId()).thenReturn(containerId);
   return rmContainer;
 }
Example #12
0
  @SuppressWarnings({"fallthrough", "unchecked"})
  private void finished() {
    ApplicationId applicationId = containerId.getApplicationAttemptId().getApplicationId();
    switch (getContainerState()) {
      case EXITED_WITH_SUCCESS:
        metrics.endRunningContainer();
        metrics.completedContainer();
        NMAuditLogger.logSuccess(
            user,
            AuditConstants.FINISH_SUCCESS_CONTAINER,
            "ContainerImpl",
            applicationId,
            containerId);
        break;
      case EXITED_WITH_FAILURE:
        if (wasLaunched) {
          metrics.endRunningContainer();
        }
        // fall through
      case LOCALIZATION_FAILED:
        metrics.failedContainer();
        NMAuditLogger.logFailure(
            user,
            AuditConstants.FINISH_FAILED_CONTAINER,
            "ContainerImpl",
            "Container failed with state: " + getContainerState(),
            applicationId,
            containerId);
        break;
      case CONTAINER_CLEANEDUP_AFTER_KILL:
        if (wasLaunched) {
          metrics.endRunningContainer();
        }
        // fall through
      case NEW:
        metrics.killedContainer();
        NMAuditLogger.logSuccess(
            user,
            AuditConstants.FINISH_KILLED_CONTAINER,
            "ContainerImpl",
            applicationId,
            containerId);
    }

    metrics.releaseContainer(this.resource);

    // Inform the application
    @SuppressWarnings("rawtypes")
    EventHandler eventHandler = dispatcher.getEventHandler();
    eventHandler.handle(new ApplicationContainerFinishedEvent(containerId));
    // Remove the container from the resource-monitor
    eventHandler.handle(new ContainerStopMonitoringEvent(containerId));
    // Tell the logService too
    eventHandler.handle(new LogHandlerContainerFinishedEvent(containerId, exitCode));
  }
  @Test(timeout = 10000)
  public void testGetContainerReport() throws YarnException, IOException {
    Configuration conf = new Configuration();
    final YarnClient client = new MockYarnClient();
    client.init(conf);
    client.start();

    List<ApplicationReport> expectedReports = ((MockYarnClient) client).getReports();

    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
    ContainerReport report = client.getContainerReport(containerId);
    Assert.assertNotNull(report);
    Assert.assertEquals(
        report.getContainerId().toString(),
        (ContainerId.newContainerId(expectedReports.get(0).getCurrentApplicationAttemptId(), 1))
            .toString());
    client.stop();
  }
Example #14
0
  /**
   * Initial DragonAppMaster Get and CheckOut necessary parameters from system environment eg:
   * container_Id,host,port,http_port,submitTime
   *
   * @param args
   */
  public static void main(String[] args) {
    try {
      String containerIdStr = System.getenv(ApplicationConstants.AM_CONTAINER_ID_ENV);
      String nodeHostString = System.getenv(ApplicationConstants.NM_HOST_ENV);
      String nodePortString = System.getenv(ApplicationConstants.NM_PORT_ENV);
      String nodeHttpPortString = System.getenv(ApplicationConstants.NM_HTTP_PORT_ENV);
      String appSubmitTimeStr = System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV);

      validateInputParam(containerIdStr, ApplicationConstants.AM_CONTAINER_ID_ENV);
      validateInputParam(nodeHostString, ApplicationConstants.NM_HOST_ENV);
      validateInputParam(nodePortString, ApplicationConstants.NM_PORT_ENV);
      validateInputParam(nodeHttpPortString, ApplicationConstants.NM_HTTP_PORT_ENV);
      validateInputParam(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV);
      ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
      ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();
      long appSubmitTime = Long.parseLong(appSubmitTimeStr);

      DragonAppMaster appMaster =
          new DragonAppMaster(
              applicationAttemptId,
              containerId,
              nodeHostString,
              Integer.parseInt(nodePortString),
              Integer.parseInt(nodeHttpPortString),
              appSubmitTime);
      Runtime.getRuntime().addShutdownHook(new CompositeServiceShutdownHook(appMaster));
      YarnConfiguration conf = new YarnConfiguration(new DragonConfiguration());
      conf.addResource(new Path(DragonJobConfig.JOB_CONF_FILE));
      String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name());
      conf.set(DragonJobConfig.USER_NAME, jobUserName);

      // Do not automatically close FileSystem objects so that in case of
      // SIGTERM I have a chance to write out the job history. I'll be closing
      // the objects myself.
      conf.setBoolean("fs.automatic.close", false);
      initAndStartAppMaster(appMaster, conf, jobUserName);
    } catch (Throwable t) {
      LOG.fatal("Error starting MRAppMaster", t);
      System.exit(1);
    }
  }
 @Before
 public void setup() {
   applicationId = ApplicationId.newInstance(9999l, 1);
   applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
   tezDAGID = TezDAGID.getInstance(applicationId, random.nextInt());
   tezVertexID = TezVertexID.getInstance(tezDAGID, random.nextInt());
   tezTaskID = TezTaskID.getInstance(tezVertexID, random.nextInt());
   tezTaskAttemptID = TezTaskAttemptID.getInstance(tezTaskID, random.nextInt());
   dagPlan = DAGPlan.newBuilder().setName("DAGPlanMock").build();
   containerId = ContainerId.newInstance(applicationAttemptId, 111);
   nodeId = NodeId.newInstance("node", 13435);
 }
  private void updateQueueWithNodeUpdate(NodeUpdateSchedulerEventWrapper eventWrapper) {
    RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
    List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
    for (UpdatedContainerInfo info : containerList) {
      for (ContainerStatus status : info.getCompletedContainers()) {
        ContainerId containerId = status.getContainerId();
        SchedulerAppReport app =
            scheduler.getSchedulerAppInfo(containerId.getApplicationAttemptId());

        if (app == null) {
          // this happens for the AM container
          // The app have already removed when the NM sends the release
          // information.
          continue;
        }

        String queue = appQueueMap.get(containerId.getApplicationAttemptId().getApplicationId());
        int releasedMemory = 0, releasedVCores = 0;
        if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
          for (RMContainer rmc : app.getLiveContainers()) {
            if (rmc.getContainerId() == containerId) {
              releasedMemory += rmc.getContainer().getResource().getMemory();
              releasedVCores += rmc.getContainer().getResource().getVirtualCores();
              break;
            }
          }
        } else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
          if (preemptionContainerMap.containsKey(containerId)) {
            Resource preResource = preemptionContainerMap.get(containerId);
            releasedMemory += preResource.getMemory();
            releasedVCores += preResource.getVirtualCores();
            preemptionContainerMap.remove(containerId);
          }
        }
        // update queue counters
        updateQueueMetrics(queue, releasedMemory, releasedVCores);
      }
    }
  }
Example #17
0
    @Override
    public void onContainerStarted(
        ContainerId containerId, Map<String, ByteBuffer> allServiceResponse) {
      if (eventSubmitter.isPresent()) {
        eventSubmitter
            .get()
            .submit(
                GobblinYarnEventConstants.EventNames.CONTAINER_STARTED,
                GobblinYarnMetricTagNames.CONTAINER_ID,
                containerId.toString());
      }

      LOGGER.info(String.format("Container %s has been started", containerId));
    }
Example #18
0
 public TaskAttemptStartedProto toProto() {
   TaskAttemptStartedProto.Builder builder = TaskAttemptStartedProto.newBuilder();
   builder
       .setTaskAttemptId(taskAttemptId.toString())
       .setStartTime(launchTime)
       .setContainerId(containerId.toString())
       .setNodeId(nodeId.toString())
       .setCreationTime(creationTime)
       .setAllocationTime(allocationTime);
   if (creationCausalTA != null) {
     builder.setCreationCausalTA(creationCausalTA.toString());
   }
   return builder.build();
 }
Example #19
0
    @Override
    public void onStopContainerError(ContainerId containerId, Throwable t) {
      if (eventSubmitter.isPresent()) {
        eventSubmitter
            .get()
            .submit(
                GobblinYarnEventConstants.EventNames.CONTAINER_STOP_ERROR,
                GobblinYarnMetricTagNames.CONTAINER_ID,
                containerId.toString(),
                GobblinYarnEventConstants.EventMetadata.ERROR_EXCEPTION,
                Throwables.getStackTraceAsString(t));
      }

      LOGGER.error(String.format("Failed to stop container %s due to error %s", containerId, t));
    }
 public TestMRApp(
     ApplicationAttemptId applicationAttemptId,
     ContainerAllocator allocator,
     int maxAppAttempts) {
   super(
       applicationAttemptId,
       ContainerId.newInstance(applicationAttemptId, 1),
       "testhost",
       2222,
       3333,
       System.currentTimeMillis(),
       maxAppAttempts);
   this.allocator = allocator;
   this.successfullyUnregistered.set(true);
 }
  public String getResourcesOption(ContainerId containerId) {
    String containerName = containerId.toString();

    StringBuilder sb = new StringBuilder("cgroups=");

    if (isCpuWeightEnabled()) {
      sb.append(pathForCgroup(CONTROLLER_CPU, containerName) + "/tasks");
      sb.append(",");
    }

    if (sb.charAt(sb.length() - 1) == ',') {
      sb.deleteCharAt(sb.length() - 1);
    }

    return sb.toString();
  }
Example #22
0
    @Override
    public void onContainerStopped(ContainerId containerId) {
      if (eventSubmitter.isPresent()) {
        eventSubmitter
            .get()
            .submit(
                GobblinYarnEventConstants.EventNames.CONTAINER_STOPPED,
                GobblinYarnMetricTagNames.CONTAINER_ID,
                containerId.toString());
      }

      LOGGER.info(String.format("Container %s has been stopped", containerId));
      containerMap.remove(containerId);
      if (containerMap.isEmpty()) {
        synchronized (allContainersStopped) {
          allContainersStopped.notify();
        }
      }
    }
Example #23
0
    public WrappedContainer(boolean shouldProfile, String profileString) {
      applicationID = ApplicationId.newInstance(rmIdentifier, 1);
      appAttemptID = ApplicationAttemptId.newInstance(applicationID, 1);
      containerID = ContainerId.newInstance(appAttemptID, 1);
      nodeID = NodeId.newInstance("host", 12500);
      nodeHttpAddress = "host:12501";
      resource = Resource.newInstance(1024, 1);
      priority = Priority.newInstance(1);
      container =
          Container.newInstance(containerID, nodeID, nodeHttpAddress, resource, priority, null);

      chh = mock(ContainerHeartbeatHandler.class);

      InetSocketAddress addr = new InetSocketAddress("localhost", 0);
      tal = mock(TaskAttemptListener.class);
      doReturn(addr).when(tal).getAddress();

      dagID = TezDAGID.getInstance(applicationID, 1);
      vertexID = TezVertexID.getInstance(dagID, 1);
      taskID = TezTaskID.getInstance(vertexID, 1);
      taskAttemptID = TezTaskAttemptID.getInstance(taskID, 1);

      eventHandler = mock(EventHandler.class);
      historyEventHandler = mock(HistoryEventHandler.class);

      Configuration conf = new Configuration(false);
      appContext = mock(AppContext.class);
      doReturn(new HashMap<ApplicationAccessType, String>()).when(appContext).getApplicationACLs();
      doReturn(eventHandler).when(appContext).getEventHandler();
      doReturn(appAttemptID).when(appContext).getApplicationAttemptId();
      doReturn(applicationID).when(appContext).getApplicationID();
      doReturn(new SystemClock()).when(appContext).getClock();
      doReturn(historyEventHandler).when(appContext).getHistoryHandler();
      doReturn(conf).when(appContext).getAMConf();
      mockDAGID();

      taskSpec = mock(TaskSpec.class);
      doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();

      amContainer =
          new AMContainerImpl(container, chh, tal, new ContainerContextMatcher(), appContext);
    }
  // package private for testing purposes
  public void setupLimits(ContainerId containerId, Resource containerResource) throws IOException {
    String containerName = containerId.toString();

    if (isCpuWeightEnabled()) {
      int containerVCores = containerResource.getVirtualCores();
      createCgroup(CONTROLLER_CPU, containerName);

      int cpuShares = CPU_DEFAULT_WEIGHT * containerVCores;
      // absolute minimum of 10 shares for zero CPU containers
      cpuShares = Math.max(cpuShares, 10);

      updateCgroup(CONTROLLER_CPU, containerName, "shares", String.valueOf(cpuShares));
      if (strictResourceUsageMode) {
        int nodeVCores =
            conf.getInt(YarnConfiguration.NM_VCORES, YarnConfiguration.DEFAULT_NM_VCORES);
        if (nodeVCores != containerVCores) {
          float containerCPU = (containerVCores * yarnProcessors) / (float) nodeVCores;
          int[] limits = getOverallLimits(containerCPU);
          updateCgroup(CONTROLLER_CPU, containerName, CPU_PERIOD_US, String.valueOf(limits[0]));
          updateCgroup(CONTROLLER_CPU, containerName, CPU_QUOTA_US, String.valueOf(limits[1]));
        }
      }
    }
  }
 private ContainerId getNextContainerId() {
   ContainerId cId = mock(ContainerId.class);
   String id = "CONTAINER_" + getNextId();
   when(cId.toString()).thenReturn(id);
   return cId;
 }
 private void clearLimits(ContainerId containerId) {
   if (isCpuWeightEnabled()) {
     deleteCgroup(pathForCgroup(CONTROLLER_CPU, containerId.toString()));
   }
 }
Example #27
0
  @Test
  public void testSuccessfulContainerLaunch()
      throws InterruptedException, IOException, YarnException {

    FileContext localFS = FileContext.getLocalFSFileContext();

    localFS.delete(new Path(localDir.getAbsolutePath()), true);
    localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
    localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
    localDir.mkdir();
    localLogDir.mkdir();
    remoteLogDir.mkdir();

    YarnConfiguration conf = new YarnConfiguration();

    Context context =
        new NMContext(
            new NMContainerTokenSecretManager(conf),
            new NMTokenSecretManagerInNM(),
            null,
            null,
            new NMNullStateStoreService()) {
          @Override
          public int getHttpPort() {
            return 1234;
          }
        };

    conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());

    ContainerExecutor exec = new DefaultContainerExecutor();
    exec.setConf(conf);

    DeletionService del = new DeletionService(exec);
    Dispatcher dispatcher = new AsyncDispatcher();
    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
    NodeHealthCheckerService healthChecker =
        new NodeHealthCheckerService(NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
    healthChecker.init(conf);
    NodeManagerMetrics metrics = NodeManagerMetrics.create();
    NodeStatusUpdater nodeStatusUpdater =
        new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {
          @Override
          protected ResourceTracker getRMClient() {
            return new LocalRMInterface();
          };

          @Override
          protected void stopRMProxy() {
            return;
          }

          @Override
          protected void startStatusUpdater() {
            return; // Don't start any updating thread.
          }

          @Override
          public long getRMIdentifier() {
            return SIMULATED_RM_IDENTIFIER;
          }
        };

    DummyContainerManager containerManager =
        new DummyContainerManager(
            context,
            exec,
            del,
            nodeStatusUpdater,
            metrics,
            new ApplicationACLsManager(conf),
            dirsHandler);
    nodeStatusUpdater.init(conf);
    ((NMContext) context).setContainerManager(containerManager);
    nodeStatusUpdater.start();
    containerManager.init(conf);
    containerManager.start();

    ContainerLaunchContext launchContext =
        recordFactory.newRecordInstance(ContainerLaunchContext.class);
    ApplicationId applicationId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0);
    ContainerId cID = ContainerId.newContainerId(applicationAttemptId, 0);

    String user = "******";
    StartContainerRequest scRequest =
        StartContainerRequest.newInstance(
            launchContext,
            TestContainerManager.createContainerToken(
                cID,
                SIMULATED_RM_IDENTIFIER,
                context.getNodeId(),
                user,
                context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);

    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.RUNNING);

    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cID);
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
    containerManager.stopContainers(stopRequest);
    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE);

    containerManager.stop();
  }
 public static ContainerId makeContainerId(long ts, int appId, int attemptId, int id) {
   return ContainerId.newInstance(
       ApplicationAttemptId.newInstance(ApplicationId.newInstance(ts, appId), attemptId), id);
 }
 public LogKey(ContainerId containerId) {
   this.keyString = containerId.toString();
 }
Example #30
0
  public static void main(String[] args) throws Exception {
    Map<String, String> env = System.getenv();
    LOG.info("Starting app master with the following environment variables");
    for (String key : env.keySet()) {
      LOG.info(key + "\t\t=" + env.get(key));
    }

    Options opts;
    opts = new Options();
    opts.addOption("num_containers", true, "Number of containers");

    // START ZOOKEEPER
    String dataDir = "dataDir";
    String logDir = "logDir";
    IDefaultNameSpace defaultNameSpace =
        new IDefaultNameSpace() {
          @Override
          public void createDefaultNameSpace(ZkClient zkClient) {}
        };
    try {
      FileUtils.deleteDirectory(new File(dataDir));
      FileUtils.deleteDirectory(new File(logDir));
    } catch (IOException e) {
      LOG.error(e);
    }

    final ZkServer server = new ZkServer(dataDir, logDir, defaultNameSpace);
    server.start();

    // start Generic AppMaster that interacts with Yarn RM
    AppMasterConfig appMasterConfig = new AppMasterConfig();
    String containerIdStr = appMasterConfig.getContainerId();
    ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
    ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();

    String configFile = AppMasterConfig.AppEnvironment.APP_SPEC_FILE.toString();
    String className = appMasterConfig.getApplicationSpecFactory();

    GenericApplicationMaster genericApplicationMaster = new GenericApplicationMaster(appAttemptID);
    try {
      genericApplicationMaster.start();
    } catch (Exception e) {
      LOG.error("Unable to start application master: ", e);
    }
    ApplicationSpecFactory factory = HelixYarnUtil.createInstance(className);

    // TODO: Avoid setting static variable.
    YarnProvisioner.applicationMaster = genericApplicationMaster;
    YarnProvisioner.applicationMasterConfig = appMasterConfig;
    ApplicationSpec applicationSpec = factory.fromYaml(new FileInputStream(configFile));
    YarnProvisioner.applicationSpec = applicationSpec;
    String zkAddress = appMasterConfig.getZKAddress();
    String clusterName = appMasterConfig.getAppName();

    // CREATE CLUSTER and setup the resources
    // connect
    ZkHelixConnection connection = new ZkHelixConnection(zkAddress);
    connection.connect();

    // create the cluster
    ClusterId clusterId = ClusterId.from(clusterName);
    ClusterAccessor clusterAccessor = connection.createClusterAccessor(clusterId);
    StateModelDefinition statelessService =
        new StateModelDefinition(StateModelConfigGenerator.generateConfigForStatelessService());
    StateModelDefinition taskStateModel =
        new StateModelDefinition(StateModelConfigGenerator.generateConfigForTaskStateModel());
    clusterAccessor.createCluster(
        new ClusterConfig.Builder(clusterId)
            .addStateModelDefinition(statelessService)
            .addStateModelDefinition(taskStateModel)
            .build());
    for (String service : applicationSpec.getServices()) {
      String resourceName = service;
      // add the resource with the local provisioner
      ResourceId resourceId = ResourceId.from(resourceName);

      ServiceConfig serviceConfig = applicationSpec.getServiceConfig(resourceName);
      serviceConfig.setSimpleField("service_name", service);
      int numContainers = serviceConfig.getIntField("num_containers", 1);

      YarnProvisionerConfig provisionerConfig = new YarnProvisionerConfig(resourceId);
      provisionerConfig.setNumContainers(numContainers);

      AutoRebalanceModeISBuilder idealStateBuilder = new AutoRebalanceModeISBuilder(resourceId);
      idealStateBuilder.setStateModelDefId(statelessService.getStateModelDefId());
      idealStateBuilder.add(PartitionId.from(resourceId, "0"));
      idealStateBuilder.setNumReplica(1);
      ResourceConfig.Builder resourceConfigBuilder =
          new ResourceConfig.Builder(ResourceId.from(resourceName));
      ResourceConfig resourceConfig =
          resourceConfigBuilder
              .provisionerConfig(provisionerConfig)
              .idealState(idealStateBuilder.build()) //
              .build();
      clusterAccessor.addResource(resourceConfig);
    }
    // start controller
    ControllerId controllerId = ControllerId.from("controller1");
    HelixController controller = connection.createController(clusterId, controllerId);
    controller.start();

    // Start any pre-specified jobs
    List<TaskConfig> taskConfigs = applicationSpec.getTaskConfigs();
    if (taskConfigs != null) {
      YarnConfiguration conf = new YarnConfiguration();
      FileSystem fs;
      fs = FileSystem.get(conf);
      for (TaskConfig taskConfig : taskConfigs) {
        URI yamlUri = taskConfig.getYamlURI();
        if (yamlUri != null && taskConfig.name != null) {
          InputStream is =
              readFromHDFS(
                  fs, taskConfig.name, yamlUri, applicationSpec, appAttemptID.getApplicationId());
          Workflow workflow = Workflow.parse(is);
          TaskDriver taskDriver = new TaskDriver(new ZKHelixManager(controller));
          taskDriver.start(workflow);
        }
      }
    }

    Thread shutdownhook =
        new Thread(
            new Runnable() {
              @Override
              public void run() {
                server.shutdown();
              }
            });
    Runtime.getRuntime().addShutdownHook(shutdownhook);
    Thread.sleep(10000);
  }