protected void writeAuditLog(ApplicationId appId) { RMApp app = rmContext.getRMApps().get(appId); String operation = "UNKONWN"; boolean success = false; switch (app.getState()) { case FAILED: operation = AuditConstants.FINISH_FAILED_APP; break; case FINISHED: operation = AuditConstants.FINISH_SUCCESS_APP; success = true; break; case KILLED: operation = AuditConstants.FINISH_KILLED_APP; success = true; break; default: } if (success) { RMAuditLogger.logSuccess(app.getUser(), operation, "RMAppManager", app.getApplicationId()); } else { StringBuilder diag = app.getDiagnostics(); String msg = diag == null ? null : diag.toString(); RMAuditLogger.logFailure( app.getUser(), operation, msg, "RMAppManager", "App failed with state: " + app.getState(), appId); } }
@Override public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings( RefreshUserToGroupsMappingsRequest request) throws YarnException, IOException { String argName = "refreshUserToGroupsMappings"; UserGroupInformation user = checkAcls(argName); if (!isRMActive()) { RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "ResourceManager is not active. Can not refresh user-groups."); throwStandbyException(); } Groups.getUserToGroupsMappingService( getConfiguration( new Configuration(false), YarnConfiguration.CORE_SITE_CONFIGURATION_FILE)) .refresh(); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); return recordFactory.newRecordInstance(RefreshUserToGroupsMappingsResponse.class); }
@Override public RefreshNodesResponse refreshNodes(RefreshNodesRequest request) throws YarnException, StandbyException { String argName = "refreshNodes"; UserGroupInformation user = checkAcls("refreshNodes"); if (!isRMActive()) { RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "ResourceManager is not active. Can not refresh nodes."); throwStandbyException(); } try { Configuration conf = getConfiguration( new Configuration(false), YarnConfiguration.YARN_SITE_CONFIGURATION_FILE); rmContext.getNodesListManager().refreshNodes(conf); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); return recordFactory.newRecordInstance(RefreshNodesResponse.class); } catch (IOException ioe) { LOG.info("Exception refreshing nodes ", ioe); RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "Exception refreshing nodes"); throw RPCUtil.getRemoteException(ioe); } }
@Override public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) throws YarnException, StandbyException { String argName = "refreshQueues"; UserGroupInformation user = checkAcls(argName); if (!isRMActive()) { RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "ResourceManager is not active. Can not refresh queues."); throwStandbyException(); } RefreshQueuesResponse response = recordFactory.newRecordInstance(RefreshQueuesResponse.class); try { rmContext.getScheduler().reinitialize(getConfig(), this.rmContext); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); return response; } catch (IOException ioe) { LOG.info("Exception refreshing queues ", ioe); RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "Exception refreshing queues"); throw RPCUtil.getRemoteException(ioe); } }
@Override public synchronized void transitionToStandby(HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException { // call refreshAdminAcls before HA state transition // for the case that adminAcls have been updated in previous active RM try { refreshAdminAcls(false); } catch (YarnException ex) { throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); } UserGroupInformation user = checkAccess("transitionToStandby"); checkHaStateChange(reqInfo); try { LOG.info("Transitioning to standby admin" + masterServiceAddress.toString()); // TODO transition leader election to standby? rm.transitionToStandby(true); RMAuditLogger.logSuccess( user.getShortUserName(), "transitionToStandby", "RMHAProtocolService"); } catch (Exception e) { RMAuditLogger.logFailure( user.getShortUserName(), "transitionToStandby", adminAcl.toString(), "RMHAProtocolService", "Exception transitioning to standby"); throw new ServiceFailedException("Error when transitioning to Standby mode", e); } }
@Override public synchronized void transitionToActive(HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException { // call refreshAdminAcls before HA state transition // for the case that adminAcls have been updated in previous active RM try { refreshAdminAcls(false); } catch (YarnException ex) { throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); } UserGroupInformation user = checkAccess("transitionToActive"); checkHaStateChange(reqInfo); try { if (!autoFailoverEnabled) { LOG.info("admin transition to active " + masterServiceAddress.toString()); rm.transitionToActive(); // call all refresh*s for active RM to get the updated configurations. refreshAll(); RMAuditLogger.logSuccess( user.getShortUserName(), "transitionToActive", "RMHAProtocolService"); } } catch (Exception e) { RMAuditLogger.logFailure( user.getShortUserName(), "transitionToActive", adminAcl.toString(), "RMHAProtocolService", "Exception transitioning to active"); throw new ServiceFailedException("Error when transitioning to Active mode", e); } }
public synchronized void containerCompleted( RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { Container container = rmContainer.getContainer(); ContainerId containerId = container.getId(); // Remove from the list of newly allocated containers if found newlyAllocatedContainers.remove(rmContainer); // Inform the container rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event)); LOG.info( "Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event); // Remove from the list of containers liveContainers.remove(rmContainer.getContainerId()); RMAuditLogger.logSuccess( getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId); // Update usage metrics Resource containerResource = rmContainer.getContainer().getResource(); queue.getMetrics().releaseResources(getUser(), 1, containerResource); Resources.subtractFrom(currentConsumption, containerResource); // remove from preemption map if it is completed preemptionMap.remove(rmContainer); // Clear resource utilization metrics cache. lastMemoryAggregateAllocationUpdateTime = -1; }
private RefreshAdminAclsResponse refreshAdminAcls(boolean checkRMHAState) throws YarnException, IOException { String argName = "refreshAdminAcls"; UserGroupInformation user = checkAcls(argName); if (checkRMHAState && !isRMActive()) { RMAuditLogger.logFailure( user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "ResourceManager is not active. Can not refresh user-groups."); throwStandbyException(); } Configuration conf = getConfiguration(new Configuration(false), YarnConfiguration.YARN_SITE_CONFIGURATION_FILE); adminAcl = new AccessControlList( conf.get(YarnConfiguration.YARN_ADMIN_ACL, YarnConfiguration.DEFAULT_YARN_ADMIN_ACL)); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); return recordFactory.newRecordInstance(RefreshAdminAclsResponse.class); }
public synchronized RMContainer allocate( NodeType type, FSSchedulerNode node, Priority priority, ResourceRequest request, Container container) { // Update allowed locality level NodeType allowed = allowedLocalityLevel.get(priority); if (allowed != null) { if (allowed.equals(NodeType.OFF_SWITCH) && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) { this.resetAllowedLocalityLevel(priority, type); } else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) { this.resetAllowedLocalityLevel(priority, type); } } // Required sanity check - AM can call 'allocate' to update resource // request without locking the scheduler, hence we need to check if (getTotalRequiredResources(priority) <= 0) { return null; } // Create RMContainer RMContainer rmContainer = new RMContainerImpl( container, getApplicationAttemptId(), node.getNodeID(), appSchedulingInfo.getUser(), rmContext); // Add it to allContainers list. newlyAllocatedContainers.add(rmContainer); liveContainers.put(container.getId(), rmContainer); // Update consumption and track allocations List<ResourceRequest> resourceRequestList = appSchedulingInfo.allocate(type, node, priority, request, container); Resources.addTo(currentConsumption, container.getResource()); // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList); // Inform the container rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START)); if (LOG.isDebugEnabled()) { LOG.debug( "allocate: applicationAttemptId=" + container.getId().getApplicationAttemptId() + " container=" + container.getId() + " host=" + container.getNodeId().getHost() + " type=" + type); } RMAuditLogger.logSuccess( getUser(), AuditConstants.ALLOC_CONTAINER, "SchedulerApp", getApplicationId(), container.getId()); return rmContainer; }