@Override protected void render(Block html) { ContainerId containerID; try { containerID = ConverterUtils.toContainerId($(CONTAINER_ID)); } catch (IllegalArgumentException e) { html.p()._("Invalid containerId " + $(CONTAINER_ID))._(); return; } DIV<Hamlet> div = html.div("#content"); Container container = this.nmContext.getContainers().get(containerID); if (container == null) { div.h1("Unknown Container. Container might have completed, " + "please go back to the previous page and retry.")._(); return; } ContainerStatus containerData = container.cloneAndGetContainerStatus(); int exitCode = containerData.getExitStatus(); String exiStatus = (exitCode == YarnConfiguration.INVALID_CONTAINER_EXIT_STATUS) ? "N/A" : String.valueOf(exitCode); info("Container information") ._("ContainerID", $(CONTAINER_ID)) ._("ContainerState", container.getContainerState()) ._("ExitStatus", exiStatus) ._("Diagnostics", containerData.getDiagnostics()) ._("User", container.getUser()) ._("TotalMemoryNeeded", container.getLaunchContext().getResource().getMemory()) ._("logs", ujoin("containerlogs", $(CONTAINER_ID), container.getUser()), "Link to logs"); html._(InfoBlock.class); }
public static ContainerStatus newContainerStatus( ContainerId containerId, ContainerState containerState, String diagnostics, int exitStatus) { ContainerStatus containerStatus = recordFactory.newRecordInstance(ContainerStatus.class); containerStatus.setState(containerState); containerStatus.setContainerId(containerId); containerStatus.setDiagnostics(diagnostics); containerStatus.setExitStatus(exitStatus); return containerStatus; }
/* * (non-Javadoc) * @see org.apache.hadoop.yarn.client.api.async.AMRMClientAsync.CallbackHandler#onContainersCompleted(java.util.List) */ @Override public void onContainersCompleted(List<ContainerStatus> statuses) { LOG.info("onContainersCompleted() called"); for (ContainerStatus status : statuses) { LOG.info("container '" + status.getContainerId() + "' status is " + status); synchronized (ApplicationMasterAsync.lock) { ApplicationMasterAsync.completedContainers++; } } }
@Private @Unstable public static ContainerStatus newInstance( ContainerId containerId, ContainerState containerState, String diagnostics, int exitStatus) { ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class); containerStatus.setState(containerState); containerStatus.setContainerId(containerId); containerStatus.setDiagnostics(diagnostics); containerStatus.setExitStatus(exitStatus); return containerStatus; }
private static void publishContainerEndEvent( TimelineClient timelineClient, ContainerStatus container) throws IOException, YarnException { TimelineEntity entity = new TimelineEntity(); entity.setEntityId(container.getContainerId().toString()); entity.setEntityType(DSEntity.DS_CONTAINER.toString()); entity.addPrimaryFilter("user", UserGroupInformation.getCurrentUser().toString()); TimelineEvent event = new TimelineEvent(); event.setTimestamp(System.currentTimeMillis()); event.setEventType(DSEvent.DS_CONTAINER_END.toString()); event.addEventInfo("State", container.getState().name()); event.addEventInfo("Exit Status", container.getExitStatus()); entity.addEvent(event); timelineClient.putEntities(entity); }
@Override protected void handleCompletedContainers(List<ContainerStatus> containerStatuses) { // strip away containers which were already marked // garbage by allocate tracker. system // never knew those even exist and might create mess // with monitor component. monitor only sees // complete status which is also the case for garbage // when it's released. List<ContainerStatus> garbageFree = new ArrayList<ContainerStatus>(); for (ContainerStatus status : containerStatuses) { if (!garbageContainers.contains(status.getContainerId())) { garbageFree.add(status); } } allocatorListener.completed(garbageFree); }
public void containerStatus(ContainerStatus containerStatus) throws Exception { Map<ApplicationId, List<ContainerStatus>> conts = new HashMap<ApplicationId, List<ContainerStatus>>(); conts.put( containerStatus.getContainerId().getApplicationAttemptId().getApplicationId(), Arrays.asList(new ContainerStatus[] {containerStatus})); nodeHeartbeat(conts, true); }
private NodeStatus getNodeStatus() { NodeStatus nodeStatus = recordFactory.newRecordInstance(NodeStatus.class); nodeStatus.setNodeId(this.nodeId); int numActiveContainers = 0; List<ContainerStatus> containersStatuses = new ArrayList<ContainerStatus>(); for (Iterator<Entry<ContainerId, Container>> i = this.context.getContainers().entrySet().iterator(); i.hasNext(); ) { Entry<ContainerId, Container> e = i.next(); ContainerId containerId = e.getKey(); Container container = e.getValue(); // Clone the container to send it to the RM org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = container.cloneAndGetContainerStatus(); containersStatuses.add(containerStatus); ++numActiveContainers; LOG.info("Sending out status for container: " + containerStatus); if (containerStatus.getState() == ContainerState.COMPLETE) { // Remove i.remove(); LOG.info("Removed completed container " + containerId); } } nodeStatus.setContainersStatuses(containersStatuses); LOG.debug(this.nodeId + " sending out status for " + numActiveContainers + " containers"); NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus(); if (this.healthChecker != null) { this.healthChecker.setHealthStatus(nodeHealthStatus); } LOG.debug( "Node's health-status : " + nodeHealthStatus.getIsNodeHealthy() + ", " + nodeHealthStatus.getHealthReport()); nodeStatus.setNodeHealthStatus(nodeHealthStatus); return nodeStatus; }
private void updateQueueWithNodeUpdate(NodeUpdateSchedulerEventWrapper eventWrapper) { RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode(); List<UpdatedContainerInfo> containerList = node.getContainerUpdates(); for (UpdatedContainerInfo info : containerList) { for (ContainerStatus status : info.getCompletedContainers()) { ContainerId containerId = status.getContainerId(); SchedulerAppReport app = scheduler.getSchedulerAppInfo(containerId.getApplicationAttemptId()); if (app == null) { // this happens for the AM container // The app have already removed when the NM sends the release // information. continue; } String queue = appQueueMap.get(containerId.getApplicationAttemptId().getApplicationId()); int releasedMemory = 0, releasedVCores = 0; if (status.getExitStatus() == ContainerExitStatus.SUCCESS) { for (RMContainer rmc : app.getLiveContainers()) { if (rmc.getContainerId() == containerId) { releasedMemory += rmc.getContainer().getResource().getMemory(); releasedVCores += rmc.getContainer().getResource().getVirtualCores(); break; } } } else if (status.getExitStatus() == ContainerExitStatus.ABORTED) { if (preemptionContainerMap.containsKey(containerId)) { Resource preResource = preemptionContainerMap.get(containerId); releasedMemory += preResource.getMemory(); releasedVCores += preResource.getVirtualCores(); preemptionContainerMap.remove(containerId); } } // update queue counters updateQueueMetrics(queue, releasedMemory, releasedVCores); } } }
private ImmutableMap.Builder<String, String> buildContainerStatusEventMetadata( ContainerStatus containerStatus) { ImmutableMap.Builder<String, String> eventMetadataBuilder = new ImmutableMap.Builder<>(); eventMetadataBuilder.put( GobblinYarnMetricTagNames.CONTAINER_ID, containerStatus.getContainerId().toString()); eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_CONTAINER_STATE, containerStatus.getState().toString()); if (ContainerExitStatus.INVALID != containerStatus.getExitStatus()) { eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_EXIT_STATUS, containerStatus.getExitStatus() + ""); } if (!Strings.isNullOrEmpty(containerStatus.getDiagnostics())) { eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_EXIT_DIAGNOSTICS, containerStatus.getDiagnostics()); } return eventMetadataBuilder; }
protected void removePendingReleaseRequests(List<ContainerStatus> completedContainersStatuses) { for (ContainerStatus containerStatus : completedContainersStatuses) { pendingRelease.remove(containerStatus.getContainerId()); } }
/** * Handle the completion of a container. A new container will be requested to replace the one that * just exited. Depending on the exit status and if container host affinity is enabled, the new * container may or may not try to be started on the same node. * * <p>A container completes in either of the following conditions: 1) some error happens in the * container and caused the container to exit, 2) the container gets killed due to some reason, * for example, if it runs over the allowed amount of virtual or physical memory, 3) the gets * preempted by the ResourceManager, or 4) the container gets stopped by the ApplicationMaster. A * replacement container is needed in all but the last case. */ private void handleContainerCompletion(ContainerStatus containerStatus) { Map.Entry<Container, String> completedContainerEntry = this.containerMap.remove(containerStatus.getContainerId()); String completedInstanceName = completedContainerEntry.getValue(); LOGGER.info( String.format( "Container %s running Helix instance %s has completed with exit status %d", containerStatus.getContainerId(), completedInstanceName, containerStatus.getExitStatus())); if (!Strings.isNullOrEmpty(containerStatus.getDiagnostics())) { LOGGER.info( String.format( "Received the following diagnostics information for container %s: %s", containerStatus.getContainerId(), containerStatus.getDiagnostics())); } if (this.shutdownInProgress) { return; } int retryCount = this.helixInstanceRetryCount .putIfAbsent(completedInstanceName, new AtomicInteger(0)) .incrementAndGet(); // Populate event metadata Optional<ImmutableMap.Builder<String, String>> eventMetadataBuilder = Optional.absent(); if (this.eventSubmitter.isPresent()) { eventMetadataBuilder = Optional.of(buildContainerStatusEventMetadata(containerStatus)); eventMetadataBuilder .get() .put(GobblinYarnEventConstants.EventMetadata.HELIX_INSTANCE_ID, completedInstanceName); eventMetadataBuilder .get() .put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_RETRY_ATTEMPT, retryCount + ""); } if (this.helixInstanceMaxRetries > 0 && retryCount > this.helixInstanceMaxRetries) { if (this.eventSubmitter.isPresent()) { this.eventSubmitter .get() .submit( GobblinYarnEventConstants.EventNames.HELIX_INSTANCE_COMPLETION, eventMetadataBuilder.get().build()); } LOGGER.warn( "Maximum number of retries has been achieved for Helix instance " + completedInstanceName); return; } // Add the Helix instance name of the completed container to the queue of unused // instance names so they can be reused by a replacement container. this.unusedHelixInstanceNames.offer(completedInstanceName); if (this.eventSubmitter.isPresent()) { this.eventSubmitter .get() .submit( GobblinYarnEventConstants.EventNames.HELIX_INSTANCE_COMPLETION, eventMetadataBuilder.get().build()); } LOGGER.info( String.format( "Requesting a new container to replace %s to run Helix instance %s", containerStatus.getContainerId(), completedInstanceName)); this.eventBus.post( new NewContainerRequest( shouldStickToTheSameNode(containerStatus.getExitStatus()) ? Optional.of(completedContainerEntry.getKey()) : Optional.<Container>absent())); }
@SuppressWarnings("unchecked") @Override public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info( "Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info( "Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { // shell script failed // counts as completed numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason numAllocatedContainers.decrementAndGet(); numRequestedContainers.decrementAndGet(); // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info( "Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } try { publishContainerEndEvent(timelineClient, containerStatus); } catch (Exception e) { LOG.error( "Container start event could not be pulished for " + containerStatus.getContainerId().toString(), e); } } // ask for more containers if any failed int askCount = numTotalContainers - numRequestedContainers.get(); numRequestedContainers.addAndGet(askCount); if (askCount > 0) { for (int i = 0; i < askCount; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } } if (numCompletedContainers.get() == numTotalContainers) { done = true; } }