public SocialiteAppMaster(Path _jarPath) {

    jarPath = _jarPath;
    configuration = new YarnConfiguration();
    numAllocedContainers = 0;

    nmClient = NMClient.createNMClient();
    nmClient.init(configuration);
    nmClient.start();

    rmClient = AMRMClientAsync.createAMRMClientAsync(100, this);
    rmClient.init(getConfiguration());
    rmClient.start();
  }
  public void runMainLoop() throws Exception {

    rmClient.registerApplicationMaster("", 0, "");

    initRegistry();

    startMasterNode();

    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(ClusterConf.get().getWorkerHeapSize());
    capability.setVirtualCores(ClusterConf.get().getNumWorkerThreads());

    List<ContainerRequest> containerReq = new ArrayList<ContainerRequest>();
    for (int i = 0; i < ClusterConf.get().getNumWorkers(); ++i) {
      ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
      rmClient.addContainerRequest(containerAsk);
      containerReq.add(containerAsk);
    }

    while (alive) {
      Thread.sleep(1000);
    }
    finish();

    /*
    for (ContainerRequest req : containerReq) {
        rmClient.removeContainerRequest(req);
    }
    int containersToAdd = 2;
    numContainersToWaitFor = containersToAdd;

    System.out.println("[Am] finished all containers. Asking for more containers, total=" + numContainersToWaitFor);
    for (int i = 0; i < containersToAdd; ++i) {
        ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
        System.out.println("[AM] Making res-req " + i);
        rmClient.addContainerRequest(containerAsk);
    }

    System.out.println("[AM] waiting for containers to finish once more!!!");
    while (!doneWithContainers()) {
        Thread.sleep(100);
    }
    */

  }
예제 #3
0
  public YarnService(
      Config config,
      String applicationName,
      String applicationId,
      YarnConfiguration yarnConfiguration,
      FileSystem fs,
      EventBus eventBus)
      throws Exception {
    this.applicationName = applicationName;
    this.applicationId = applicationId;

    this.config = config;

    this.eventBus = eventBus;

    this.gobblinMetrics =
        config.getBoolean(ConfigurationKeys.METRICS_ENABLED_KEY)
            ? Optional.of(buildGobblinMetrics())
            : Optional.<GobblinMetrics>absent();

    this.eventSubmitter =
        config.getBoolean(ConfigurationKeys.METRICS_ENABLED_KEY)
            ? Optional.of(buildEventSubmitter())
            : Optional.<EventSubmitter>absent();

    this.yarnConfiguration = yarnConfiguration;
    this.fs = fs;

    this.amrmClientAsync =
        closer.register(
            AMRMClientAsync.createAMRMClientAsync(1000, new AMRMClientCallbackHandler()));
    this.amrmClientAsync.init(this.yarnConfiguration);
    this.nmClientAsync =
        closer.register(NMClientAsync.createNMClientAsync(new NMClientCallbackHandler()));
    this.nmClientAsync.init(this.yarnConfiguration);

    this.initialContainers = config.getInt(GobblinYarnConfigurationKeys.INITIAL_CONTAINERS_KEY);
    this.requestedContainerMemoryMbs =
        config.getInt(GobblinYarnConfigurationKeys.CONTAINER_MEMORY_MBS_KEY);
    this.requestedContainerCores = config.getInt(GobblinYarnConfigurationKeys.CONTAINER_CORES_KEY);
    this.containerHostAffinityEnabled =
        config.getBoolean(GobblinYarnConfigurationKeys.CONTAINER_HOST_AFFINITY_ENABLED);

    this.helixInstanceMaxRetries =
        config.getInt(GobblinYarnConfigurationKeys.HELIX_INSTANCE_MAX_RETRIES);

    this.containerJvmArgs =
        config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_JVM_ARGS_KEY)
            ? Optional.of(config.getString(GobblinYarnConfigurationKeys.CONTAINER_JVM_ARGS_KEY))
            : Optional.<String>absent();

    this.containerLaunchExecutor =
        Executors.newFixedThreadPool(
            10,
            ExecutorsUtils.newThreadFactory(
                Optional.of(LOGGER), Optional.of("ContainerLaunchExecutor")));

    this.tokens = getSecurityTokens();
  }
 void finish() throws IOException, YarnException {
   rmClient.unregisterApplicationMaster(finalStatus, appMessage, null);
   nmClient.stop();
 }
  @VisibleForTesting
  protected boolean finish() {
    // wait for completion.
    while (!done && (numCompletedContainers.get() != numTotalContainers)) {
      try {
        Thread.sleep(200);
      } catch (InterruptedException ex) {
      }
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
      try {
        launchThread.join(10000);
      } catch (InterruptedException e) {
        LOG.info("Exception thrown in thread join: " + e.getMessage());
        e.printStackTrace();
      }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
      appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
      appStatus = FinalApplicationStatus.FAILED;
      appMessage =
          "Diagnostics."
              + ", total="
              + numTotalContainers
              + ", completed="
              + numCompletedContainers.get()
              + ", allocated="
              + numAllocatedContainers.get()
              + ", failed="
              + numFailedContainers.get();
      success = false;
    }
    try {
      amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
      LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
      LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    return success;
  }
  /**
   * Main run function for the application master
   *
   * @throws YarnException
   * @throws IOException
   */
  @SuppressWarnings({"unchecked"})
  public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
      publishApplicationAttemptEvent(
          timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
      LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
      Token<?> token = iter.next();
      LOG.info(token);
      if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
        iter.remove();
      }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response =
        amRMClient.registerApplicationMaster(
            appMasterHostname, appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
      LOG.info(
          "Container memory specified above max threshold of cluster."
              + " Using max value."
              + ", specified="
              + containerMemory
              + ", max="
              + maxMem);
      containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
      LOG.info(
          "Container virtual cores specified above max threshold of cluster."
              + " Using max value."
              + ", specified="
              + containerVirtualCores
              + ", max="
              + maxVCores);
      containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(
        "Received "
            + previousAMRunningContainers.size()
            + " previous AM's running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
      ContainerRequest containerAsk = setupContainerAskForRM();
      amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainersToRequest);
    try {
      publishApplicationAttemptEvent(
          timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
      LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
  }