private static void publishContainerEndEvent( TimelineClient timelineClient, ContainerStatus container) throws IOException, YarnException { TimelineEntity entity = new TimelineEntity(); entity.setEntityId(container.getContainerId().toString()); entity.setEntityType(DSEntity.DS_CONTAINER.toString()); entity.addPrimaryFilter("user", UserGroupInformation.getCurrentUser().toString()); TimelineEvent event = new TimelineEvent(); event.setTimestamp(System.currentTimeMillis()); event.setEventType(DSEvent.DS_CONTAINER_END.toString()); event.addEventInfo("State", container.getState().name()); event.addEventInfo("Exit Status", container.getExitStatus()); entity.addEvent(event); timelineClient.putEntities(entity); }
private NodeStatus getNodeStatus() { NodeStatus nodeStatus = recordFactory.newRecordInstance(NodeStatus.class); nodeStatus.setNodeId(this.nodeId); int numActiveContainers = 0; List<ContainerStatus> containersStatuses = new ArrayList<ContainerStatus>(); for (Iterator<Entry<ContainerId, Container>> i = this.context.getContainers().entrySet().iterator(); i.hasNext(); ) { Entry<ContainerId, Container> e = i.next(); ContainerId containerId = e.getKey(); Container container = e.getValue(); // Clone the container to send it to the RM org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = container.cloneAndGetContainerStatus(); containersStatuses.add(containerStatus); ++numActiveContainers; LOG.info("Sending out status for container: " + containerStatus); if (containerStatus.getState() == ContainerState.COMPLETE) { // Remove i.remove(); LOG.info("Removed completed container " + containerId); } } nodeStatus.setContainersStatuses(containersStatuses); LOG.debug(this.nodeId + " sending out status for " + numActiveContainers + " containers"); NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus(); if (this.healthChecker != null) { this.healthChecker.setHealthStatus(nodeHealthStatus); } LOG.debug( "Node's health-status : " + nodeHealthStatus.getIsNodeHealthy() + ", " + nodeHealthStatus.getHealthReport()); nodeStatus.setNodeHealthStatus(nodeHealthStatus); return nodeStatus; }
private ImmutableMap.Builder<String, String> buildContainerStatusEventMetadata( ContainerStatus containerStatus) { ImmutableMap.Builder<String, String> eventMetadataBuilder = new ImmutableMap.Builder<>(); eventMetadataBuilder.put( GobblinYarnMetricTagNames.CONTAINER_ID, containerStatus.getContainerId().toString()); eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_CONTAINER_STATE, containerStatus.getState().toString()); if (ContainerExitStatus.INVALID != containerStatus.getExitStatus()) { eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_EXIT_STATUS, containerStatus.getExitStatus() + ""); } if (!Strings.isNullOrEmpty(containerStatus.getDiagnostics())) { eventMetadataBuilder.put( GobblinYarnEventConstants.EventMetadata.CONTAINER_STATUS_EXIT_DIAGNOSTICS, containerStatus.getDiagnostics()); } return eventMetadataBuilder; }
@SuppressWarnings("unchecked") @Override public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info( "Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info( "Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { // shell script failed // counts as completed numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason numAllocatedContainers.decrementAndGet(); numRequestedContainers.decrementAndGet(); // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info( "Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } try { publishContainerEndEvent(timelineClient, containerStatus); } catch (Exception e) { LOG.error( "Container start event could not be pulished for " + containerStatus.getContainerId().toString(), e); } } // ask for more containers if any failed int askCount = numTotalContainers - numRequestedContainers.get(); numRequestedContainers.addAndGet(askCount); if (askCount > 0) { for (int i = 0; i < askCount; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } } if (numCompletedContainers.get() == numTotalContainers) { done = true; } }