예제 #1
1
  protected void writeAuditLog(ApplicationId appId) {
    RMApp app = rmContext.getRMApps().get(appId);
    String operation = "UNKONWN";
    boolean success = false;
    switch (app.getState()) {
      case FAILED:
        operation = AuditConstants.FINISH_FAILED_APP;
        break;
      case FINISHED:
        operation = AuditConstants.FINISH_SUCCESS_APP;
        success = true;
        break;
      case KILLED:
        operation = AuditConstants.FINISH_KILLED_APP;
        success = true;
        break;
      default:
    }

    if (success) {
      RMAuditLogger.logSuccess(app.getUser(), operation, "RMAppManager", app.getApplicationId());
    } else {
      StringBuilder diag = app.getDiagnostics();
      String msg = diag == null ? null : diag.toString();
      RMAuditLogger.logFailure(
          app.getUser(),
          operation,
          msg,
          "RMAppManager",
          "App failed with state: " + app.getState(),
          appId);
    }
  }
예제 #2
0
  @Override
  public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
      RefreshUserToGroupsMappingsRequest request) throws YarnException, IOException {
    String argName = "refreshUserToGroupsMappings";
    UserGroupInformation user = checkAcls(argName);

    if (!isRMActive()) {
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "ResourceManager is not active. Can not refresh user-groups.");
      throwStandbyException();
    }

    Groups.getUserToGroupsMappingService(
            getConfiguration(
                new Configuration(false), YarnConfiguration.CORE_SITE_CONFIGURATION_FILE))
        .refresh();

    RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService");

    return recordFactory.newRecordInstance(RefreshUserToGroupsMappingsResponse.class);
  }
예제 #3
0
  @Override
  public RefreshNodesResponse refreshNodes(RefreshNodesRequest request)
      throws YarnException, StandbyException {
    String argName = "refreshNodes";
    UserGroupInformation user = checkAcls("refreshNodes");

    if (!isRMActive()) {
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "ResourceManager is not active. Can not refresh nodes.");
      throwStandbyException();
    }

    try {
      Configuration conf =
          getConfiguration(
              new Configuration(false), YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
      rmContext.getNodesListManager().refreshNodes(conf);
      RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService");
      return recordFactory.newRecordInstance(RefreshNodesResponse.class);
    } catch (IOException ioe) {
      LOG.info("Exception refreshing nodes ", ioe);
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "Exception refreshing nodes");
      throw RPCUtil.getRemoteException(ioe);
    }
  }
예제 #4
0
  @Override
  public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request)
      throws YarnException, StandbyException {
    String argName = "refreshQueues";
    UserGroupInformation user = checkAcls(argName);

    if (!isRMActive()) {
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "ResourceManager is not active. Can not refresh queues.");
      throwStandbyException();
    }

    RefreshQueuesResponse response = recordFactory.newRecordInstance(RefreshQueuesResponse.class);
    try {
      rmContext.getScheduler().reinitialize(getConfig(), this.rmContext);
      RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService");
      return response;
    } catch (IOException ioe) {
      LOG.info("Exception refreshing queues ", ioe);
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "Exception refreshing queues");
      throw RPCUtil.getRemoteException(ioe);
    }
  }
예제 #5
0
 @Override
 public synchronized void transitionToStandby(HAServiceProtocol.StateChangeRequestInfo reqInfo)
     throws IOException {
   // call refreshAdminAcls before HA state transition
   // for the case that adminAcls have been updated in previous active RM
   try {
     refreshAdminAcls(false);
   } catch (YarnException ex) {
     throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
   }
   UserGroupInformation user = checkAccess("transitionToStandby");
   checkHaStateChange(reqInfo);
   try {
     LOG.info("Transitioning to standby admin" + masterServiceAddress.toString());
     // TODO transition leader election to standby?
     rm.transitionToStandby(true);
     RMAuditLogger.logSuccess(
         user.getShortUserName(), "transitionToStandby", "RMHAProtocolService");
   } catch (Exception e) {
     RMAuditLogger.logFailure(
         user.getShortUserName(),
         "transitionToStandby",
         adminAcl.toString(),
         "RMHAProtocolService",
         "Exception transitioning to standby");
     throw new ServiceFailedException("Error when transitioning to Standby mode", e);
   }
 }
예제 #6
0
  @Override
  public synchronized void transitionToActive(HAServiceProtocol.StateChangeRequestInfo reqInfo)
      throws IOException {
    // call refreshAdminAcls before HA state transition
    // for the case that adminAcls have been updated in previous active RM
    try {
      refreshAdminAcls(false);
    } catch (YarnException ex) {
      throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
    }

    UserGroupInformation user = checkAccess("transitionToActive");
    checkHaStateChange(reqInfo);
    try {
      if (!autoFailoverEnabled) {
        LOG.info("admin transition to active " + masterServiceAddress.toString());
        rm.transitionToActive();
        // call all refresh*s for active RM to get the updated configurations.
        refreshAll();
        RMAuditLogger.logSuccess(
            user.getShortUserName(), "transitionToActive", "RMHAProtocolService");
      }
    } catch (Exception e) {
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          "transitionToActive",
          adminAcl.toString(),
          "RMHAProtocolService",
          "Exception transitioning to active");
      throw new ServiceFailedException("Error when transitioning to Active mode", e);
    }
  }
예제 #7
0
  @Override
  public RefreshServiceAclsResponse refreshServiceAcls(RefreshServiceAclsRequest request)
      throws YarnException, IOException {
    if (!getConfig()
        .getBoolean(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) {
      throw RPCUtil.getRemoteException(
          new IOException(
              "Service Authorization ("
                  + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION
                  + ") not enabled."));
    }

    String argName = "refreshServiceAcls";
    if (!isRMActive()) {
      RMAuditLogger.logFailure(
          UserGroupInformation.getCurrentUser().getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "ResourceManager is not active. Can not refresh Service ACLs.");
      throwStandbyException();
    }

    PolicyProvider policyProvider = RMPolicyProvider.getInstance();
    Configuration conf =
        getConfiguration(
            new Configuration(false), YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);

    refreshServiceAcls(conf, policyProvider);
    rmContext.getClientRMService().refreshServiceAcls(conf, policyProvider);
    rmContext.getApplicationMasterService().refreshServiceAcls(conf, policyProvider);
    rmContext.getResourceTrackerService().refreshServiceAcls(conf, policyProvider);

    return recordFactory.newRecordInstance(RefreshServiceAclsResponse.class);
  }
예제 #8
0
  private RefreshAdminAclsResponse refreshAdminAcls(boolean checkRMHAState)
      throws YarnException, IOException {
    String argName = "refreshAdminAcls";
    UserGroupInformation user = checkAcls(argName);

    if (checkRMHAState && !isRMActive()) {
      RMAuditLogger.logFailure(
          user.getShortUserName(),
          argName,
          adminAcl.toString(),
          "AdminService",
          "ResourceManager is not active. Can not refresh user-groups.");
      throwStandbyException();
    }
    Configuration conf =
        getConfiguration(new Configuration(false), YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
    adminAcl =
        new AccessControlList(
            conf.get(YarnConfiguration.YARN_ADMIN_ACL, YarnConfiguration.DEFAULT_YARN_ADMIN_ACL));
    RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService");

    return recordFactory.newRecordInstance(RefreshAdminAclsResponse.class);
  }
예제 #9
0
  public synchronized void containerCompleted(
      RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {

    Container container = rmContainer.getContainer();
    ContainerId containerId = container.getId();

    // Remove from the list of newly allocated containers if found
    newlyAllocatedContainers.remove(rmContainer);

    // Inform the container
    rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
    LOG.info(
        "Completed container: "
            + rmContainer.getContainerId()
            + " in state: "
            + rmContainer.getState()
            + " event:"
            + event);

    // Remove from the list of containers
    liveContainers.remove(rmContainer.getContainerId());

    RMAuditLogger.logSuccess(
        getUser(),
        AuditConstants.RELEASE_CONTAINER,
        "SchedulerApp",
        getApplicationId(),
        containerId);

    // Update usage metrics
    Resource containerResource = rmContainer.getContainer().getResource();
    queue.getMetrics().releaseResources(getUser(), 1, containerResource);
    Resources.subtractFrom(currentConsumption, containerResource);

    // remove from preemption map if it is completed
    preemptionMap.remove(rmContainer);

    // Clear resource utilization metrics cache.
    lastMemoryAggregateAllocationUpdateTime = -1;
  }
예제 #10
0
  public synchronized RMContainer allocate(
      NodeType type,
      FSSchedulerNode node,
      Priority priority,
      ResourceRequest request,
      Container container) {
    // Update allowed locality level
    NodeType allowed = allowedLocalityLevel.get(priority);
    if (allowed != null) {
      if (allowed.equals(NodeType.OFF_SWITCH)
          && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) {
        this.resetAllowedLocalityLevel(priority, type);
      } else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) {
        this.resetAllowedLocalityLevel(priority, type);
      }
    }

    // Required sanity check - AM can call 'allocate' to update resource
    // request without locking the scheduler, hence we need to check
    if (getTotalRequiredResources(priority) <= 0) {
      return null;
    }

    // Create RMContainer
    RMContainer rmContainer =
        new RMContainerImpl(
            container,
            getApplicationAttemptId(),
            node.getNodeID(),
            appSchedulingInfo.getUser(),
            rmContext);

    // Add it to allContainers list.
    newlyAllocatedContainers.add(rmContainer);
    liveContainers.put(container.getId(), rmContainer);

    // Update consumption and track allocations
    List<ResourceRequest> resourceRequestList =
        appSchedulingInfo.allocate(type, node, priority, request, container);
    Resources.addTo(currentConsumption, container.getResource());

    // Update resource requests related to "request" and store in RMContainer
    ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList);

    // Inform the container
    rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START));

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "allocate: applicationAttemptId="
              + container.getId().getApplicationAttemptId()
              + " container="
              + container.getId()
              + " host="
              + container.getNodeId().getHost()
              + " type="
              + type);
    }
    RMAuditLogger.logSuccess(
        getUser(),
        AuditConstants.ALLOC_CONTAINER,
        "SchedulerApp",
        getApplicationId(),
        container.getId());

    return rmContainer;
  }