Example #1
0
  @Before
  public void setUp() {
    Configuration conf = new Configuration();
    // Dispatcher that processes events inline
    Dispatcher dispatcher = new InlineDispatcher();

    dispatcher.register(RMNodeEventType.class, new TestRMNodeEventDispatcher());

    RMContext context =
        new RMContextImpl(dispatcher, null, null, null, null, null, null, null, null, null);
    dispatcher.register(SchedulerEventType.class, new InlineDispatcher.EmptyEventHandler());
    dispatcher.register(RMNodeEventType.class, new NodeEventDispatcher(context));
    NMLivelinessMonitor nmLivelinessMonitor = new NMLivelinessMonitor(dispatcher);
    nmLivelinessMonitor.init(conf);
    nmLivelinessMonitor.start();
    NodesListManager nodesListManager = new NodesListManager(context);
    nodesListManager.init(conf);
    RMContainerTokenSecretManager containerTokenSecretManager =
        new RMContainerTokenSecretManager(conf);
    containerTokenSecretManager.start();
    NMTokenSecretManagerInRM nmTokenSecretManager = new NMTokenSecretManagerInRM(conf);
    nmTokenSecretManager.start();
    resourceTrackerService =
        new ResourceTrackerService(
            context,
            nodesListManager,
            nmLivelinessMonitor,
            containerTokenSecretManager,
            nmTokenSecretManager);

    resourceTrackerService.init(conf);
    resourceTrackerService.start();
  }
 private static StubbedJob createStubbedJob(
     Configuration conf, Dispatcher dispatcher, int numSplits, AppContext appContext) {
   JobID jobID = JobID.forName("job_1234567890000_0001");
   JobId jobId = TypeConverter.toYarn(jobID);
   if (appContext == null) {
     appContext = mock(AppContext.class);
     when(appContext.hasSuccessfullyUnregistered()).thenReturn(true);
   }
   StubbedJob job =
       new StubbedJob(
           jobId,
           ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0),
           conf,
           dispatcher.getEventHandler(),
           true,
           "somebody",
           numSplits,
           appContext);
   dispatcher.register(JobEventType.class, job);
   EventHandler mockHandler = mock(EventHandler.class);
   dispatcher.register(TaskEventType.class, mockHandler);
   dispatcher.register(org.apache.hadoop.mapreduce.jobhistory.EventType.class, mockHandler);
   dispatcher.register(JobFinishEvent.Type.class, mockHandler);
   return job;
 }
  private static CommitterEventHandler createCommitterEventHandler(
      Dispatcher dispatcher, OutputCommitter committer) {
    final SystemClock clock = new SystemClock();
    AppContext appContext = mock(AppContext.class);
    when(appContext.getEventHandler()).thenReturn(dispatcher.getEventHandler());
    when(appContext.getClock()).thenReturn(clock);
    RMHeartbeatHandler heartbeatHandler =
        new RMHeartbeatHandler() {
          @Override
          public long getLastHeartbeatTime() {
            return clock.getTime();
          }

          @Override
          public void runOnNextHeartbeat(Runnable callback) {
            callback.run();
          }
        };
    ApplicationAttemptId id =
        ConverterUtils.toApplicationAttemptId("appattempt_1234567890000_0001_0");
    when(appContext.getApplicationID()).thenReturn(id.getApplicationId());
    when(appContext.getApplicationAttemptId()).thenReturn(id);
    CommitterEventHandler handler =
        new CommitterEventHandler(appContext, committer, heartbeatHandler);
    dispatcher.register(CommitterEventType.class, handler);
    return handler;
  }
  @SuppressWarnings("unchecked")
  protected void internalRemoveFromClusterNodeLabels(Collection<String> labelsToRemove) {
    // remove labels from nodes
    for (Map.Entry<String, Host> nodeEntry : nodeCollections.entrySet()) {
      Host host = nodeEntry.getValue();
      if (null != host) {
        host.labels.removeAll(labelsToRemove);
        for (Node nm : host.nms.values()) {
          if (nm.labels != null) {
            nm.labels.removeAll(labelsToRemove);
          }
        }
      }
    }

    // remove labels from node labels collection
    for (String label : labelsToRemove) {
      labelCollections.remove(label);
    }

    // create event to remove labels
    if (null != dispatcher) {
      dispatcher.getEventHandler().handle(new RemoveClusterNodeLabels(labelsToRemove));
    }

    LOG.info("Remove labels: [" + StringUtils.join(labelsToRemove.iterator(), ",") + "]");
  }
  @SuppressWarnings("unchecked")
  public void addToCluserNodeLabels(Collection<NodeLabel> labels) throws IOException {
    if (!nodeLabelsEnabled) {
      LOG.error(NODE_LABELS_NOT_ENABLED_ERR);
      throw new IOException(NODE_LABELS_NOT_ENABLED_ERR);
    }
    if (null == labels || labels.isEmpty()) {
      return;
    }
    List<NodeLabel> newLabels = new ArrayList<NodeLabel>();
    normalizeNodeLabels(labels);

    // do a check before actual adding them, will throw exception if any of them
    // doesn't meet label name requirement
    for (NodeLabel label : labels) {
      checkAndThrowLabelName(label.getName());
    }

    for (NodeLabel label : labels) {
      // shouldn't overwrite it to avoid changing the Label.resource
      if (this.labelCollections.get(label.getName()) == null) {
        this.labelCollections.put(label.getName(), new RMNodeLabel(label));
        newLabels.add(label);
      }
    }
    if (null != dispatcher && !newLabels.isEmpty()) {
      dispatcher.getEventHandler().handle(new StoreNewClusterNodeLabels(newLabels));
    }

    LOG.info("Add labels: [" + StringUtils.join(labels.iterator(), ",") + "]");
  }
Example #6
0
 /** Create and initialize (but don't start) a single job. */
 protected Job createJob(Configuration conf) {
   // create single job
   Job newJob = new JobInAppMaster(jobId, conf, context);
   ((RunningAppContext) context).jobs.put(newJob.getID(), newJob);
   dispatcher.register(JobFinishEvent.Type.class, createJobFinishEventHandler());
   return newJob;
 }
  /** Create and initialize (but don't start) a single dag. */
  protected DAG createDAG(DAGPlan dagPB) {
    TezDAGID dagId = new TezDAGID(appAttemptID.getApplicationId(), dagCounter.incrementAndGet());

    Iterator<PlanKeyValuePair> iter = dagPB.getDagKeyValues().getConfKeyValuesList().iterator();
    Configuration dagConf = new Configuration(amConf);

    while (iter.hasNext()) {
      PlanKeyValuePair keyValPair = iter.next();
      dagConf.set(keyValPair.getKey(), keyValPair.getValue());
    }

    // create single dag
    DAG newDag =
        new DAGImpl(
            dagId,
            dagConf,
            dagPB,
            dispatcher.getEventHandler(),
            taskAttemptListener,
            jobTokenSecretManager,
            fsTokens,
            clock,
            currentUser.getShortUserName(),
            taskHeartbeatHandler,
            context);

    return newDag;
  } // end createDag()
Example #8
0
  @SuppressWarnings({"fallthrough", "unchecked"})
  private void finished() {
    ApplicationId applicationId = containerId.getApplicationAttemptId().getApplicationId();
    switch (getContainerState()) {
      case EXITED_WITH_SUCCESS:
        metrics.endRunningContainer();
        metrics.completedContainer();
        NMAuditLogger.logSuccess(
            user,
            AuditConstants.FINISH_SUCCESS_CONTAINER,
            "ContainerImpl",
            applicationId,
            containerId);
        break;
      case EXITED_WITH_FAILURE:
        if (wasLaunched) {
          metrics.endRunningContainer();
        }
        // fall through
      case LOCALIZATION_FAILED:
        metrics.failedContainer();
        NMAuditLogger.logFailure(
            user,
            AuditConstants.FINISH_FAILED_CONTAINER,
            "ContainerImpl",
            "Container failed with state: " + getContainerState(),
            applicationId,
            containerId);
        break;
      case CONTAINER_CLEANEDUP_AFTER_KILL:
        if (wasLaunched) {
          metrics.endRunningContainer();
        }
        // fall through
      case NEW:
        metrics.killedContainer();
        NMAuditLogger.logSuccess(
            user,
            AuditConstants.FINISH_KILLED_CONTAINER,
            "ContainerImpl",
            applicationId,
            containerId);
    }

    metrics.releaseContainer(this.resource);

    // Inform the application
    @SuppressWarnings("rawtypes")
    EventHandler eventHandler = dispatcher.getEventHandler();
    eventHandler.handle(new ApplicationContainerFinishedEvent(containerId));
    // Remove the container from the resource-monitor
    eventHandler.handle(new ContainerStopMonitoringEvent(containerId));
    // Tell the logService too
    eventHandler.handle(new LogHandlerContainerFinishedEvent(containerId, exitCode));
  }
Example #9
0
 @SuppressWarnings("unchecked") // dispatcher not typed
 public void cleanup() {
   Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrc =
       new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
   if (!publicRsrcs.isEmpty()) {
     rsrc.put(LocalResourceVisibility.PUBLIC, publicRsrcs);
   }
   if (!privateRsrcs.isEmpty()) {
     rsrc.put(LocalResourceVisibility.PRIVATE, privateRsrcs);
   }
   if (!appRsrcs.isEmpty()) {
     rsrc.put(LocalResourceVisibility.APPLICATION, appRsrcs);
   }
   dispatcher.getEventHandler().handle(new ContainerLocalizationCleanupEvent(this, rsrc));
 }
  @Override
  protected void serviceStart() throws Exception {
    if (nodeLabelsEnabled) {
      initNodeLabelStore(getConfig());
    }

    // init dispatcher only when service start, because recover will happen in
    // service init, we don't want to trigger any event handling at that time.
    initDispatcher(getConfig());

    if (null != dispatcher) {
      dispatcher.register(NodeLabelsStoreEventType.class, new ForwardingEventHandler());
    }

    startDispatcher();
  }
  @SuppressWarnings("unchecked")
  @Override
  public void serviceStart() throws Exception {

    // start all the components
    startServices();
    super.serviceStart();

    this.state = DAGAppMasterState.IDLE;

    // metrics system init is really init & start.
    // It's more test friendly to put it here.
    DefaultMetricsSystem.initialize("DAGAppMaster");

    this.appsStartTime = clock.getTime();
    AMStartedEvent startEvent =
        new AMStartedEvent(appAttemptID, startTime, appsStartTime, appSubmitTime);
    dispatcher.getEventHandler().handle(new DAGHistoryEvent(startEvent));

    this.lastDAGCompletionTime = clock.getTime();

    if (!isSession) {
      startDAG();
    } else {
      LOG.info("In Session mode. Waiting for DAG over RPC");
      this.dagSubmissionTimer = new Timer(true);
      this.dagSubmissionTimer.scheduleAtFixedRate(
          new TimerTask() {
            @Override
            public void run() {
              checkAndHandleSessionTimeout();
            }
          },
          sessionTimeoutInterval,
          sessionTimeoutInterval / 10);
    }
  }
  @SuppressWarnings("unchecked")
  protected void internalUpdateLabelsOnNodes(
      Map<NodeId, Set<String>> nodeToLabels, NodeLabelUpdateOperation op) throws IOException {
    // do update labels from nodes
    Map<NodeId, Set<String>> newNMToLabels = new HashMap<NodeId, Set<String>>();
    Set<String> oldLabels;
    for (Entry<NodeId, Set<String>> entry : nodeToLabels.entrySet()) {
      NodeId nodeId = entry.getKey();
      Set<String> labels = entry.getValue();

      createHostIfNonExisted(nodeId.getHost());
      if (nodeId.getPort() == WILDCARD_PORT) {
        Host host = nodeCollections.get(nodeId.getHost());
        switch (op) {
          case REMOVE:
            removeNodeFromLabels(nodeId, labels);
            host.labels.removeAll(labels);
            for (Node node : host.nms.values()) {
              if (node.labels != null) {
                node.labels.removeAll(labels);
              }
              removeNodeFromLabels(node.nodeId, labels);
            }
            break;
          case ADD:
            addNodeToLabels(nodeId, labels);
            host.labels.addAll(labels);
            for (Node node : host.nms.values()) {
              if (node.labels != null) {
                node.labels.addAll(labels);
              }
              addNodeToLabels(node.nodeId, labels);
            }
            break;
          case REPLACE:
            replaceNodeForLabels(nodeId, host.labels, labels);
            host.labels.clear();
            host.labels.addAll(labels);
            for (Node node : host.nms.values()) {
              replaceNodeForLabels(node.nodeId, node.labels, labels);
              node.labels = null;
            }
            break;
          default:
            break;
        }
        newNMToLabels.put(nodeId, host.labels);
      } else {
        if (EnumSet.of(NodeLabelUpdateOperation.ADD, NodeLabelUpdateOperation.REPLACE)
            .contains(op)) {
          // Add and replace
          createNodeIfNonExisted(nodeId);
          Node nm = getNMInNodeSet(nodeId);
          switch (op) {
            case ADD:
              addNodeToLabels(nodeId, labels);
              if (nm.labels == null) {
                nm.labels = new HashSet<String>();
              }
              nm.labels.addAll(labels);
              break;
            case REPLACE:
              oldLabels = getLabelsByNode(nodeId);
              replaceNodeForLabels(nodeId, oldLabels, labels);
              if (nm.labels == null) {
                nm.labels = new HashSet<String>();
              }
              nm.labels.clear();
              nm.labels.addAll(labels);
              break;
            default:
              break;
          }
          newNMToLabels.put(nodeId, nm.labels);
        } else {
          // remove
          removeNodeFromLabels(nodeId, labels);
          Node nm = getNMInNodeSet(nodeId);
          if (nm.labels != null) {
            nm.labels.removeAll(labels);
            newNMToLabels.put(nodeId, nm.labels);
          }
        }
      }
    }

    if (null != dispatcher && !isDistributedNodeLabelConfiguration) {
      // In case of DistributedNodeLabelConfiguration, no need to save the the
      // NodeLabels Mapping to the back-end store, as on RM restart/failover
      // NodeLabels are collected from NM through Register/Heartbeat again
      dispatcher.getEventHandler().handle(new UpdateNodeToLabelsMappingsEvent(newNMToLabels));
    }

    // shows node->labels we added
    LOG.info(op.name() + " labels on nodes:");
    for (Entry<NodeId, Set<String>> entry : newNMToLabels.entrySet()) {
      LOG.info(
          "  NM="
              + entry.getKey()
              + ", labels=["
              + StringUtils.join(entry.getValue().iterator(), ",")
              + "]");
    }
  }
Example #13
0
  @Override
  public void init(final Configuration conf) {

    // Get all needed security tokens.
    downloadTokensAndSetupUGI(conf);

    // Initialize application context,name,attemptId,jobId
    context = new RunningAppContext();
    appName = conf.get(DragonJobConfig.JOB_NAME, "<missing app name>");
    conf.setInt(DragonJobConfig.APPLICATION_ATTEMPT_ID, appAttemptId.getAttemptId());

    jobId =
        JobId.newJobId(appAttemptId.getApplicationId(), appAttemptId.getApplicationId().getId());

    // service to hand out event
    dispatcher = createDispatcher();
    addIfService(dispatcher);

    // service to handle requests from JobClient
    clientService = createClientService(context);
    addIfService(clientService);

    // service to log job history events
    EventHandler<JobHistoryEvent> historyHandlerService = createJobHistoryHandler(context);
    dispatcher.register(
        org.apache.hadoop.realtime.jobhistory.EventType.class, historyHandlerService);

    // Initialize the JobEventDispatcher
    this.jobEventDispatcher = new JobEventDispatcher();

    // register the event dispatchers
    dispatcher.register(JobEventType.class, jobEventDispatcher);
    dispatcher.register(TaskEventType.class, new TaskEventDispatcher());
    dispatcher.register(TaskAttemptEventType.class, new TaskAttemptEventDispatcher());

    // service to handle requests to TaskUmbilicalProtocol
    childService = createChildService(context);
    addIfService(childService);
    dispatcher.register(ChildExecutionEventType.class, childService);

    // service to allocate containers from RM (if non-uber) or to fake it (uber)
    containerAllocator = createContainerAllocator(clientService, context);
    addIfService(containerAllocator);
    dispatcher.register(ContainerAllocator.EventType.class, containerAllocator);

    // corresponding service to launch allocated containers via NodeManager
    containerLauncher = createContainerLauncher(context);
    addIfService(containerLauncher);
    dispatcher.register(ContainerLauncher.EventType.class, containerLauncher);

    // dragon zookeeper service
    zkService = new DragonZKService(context);
    addIfService(zkService);
    dispatcher.register(ZKEventType.class, zkService);

    // Add the JobHistoryEventHandler last so that it is properly stopped first.
    // This will guarantee that all history-events are flushed before AM goes
    // ahead with shutdown.
    // Note: Even though JobHistoryEventHandler is started last, if any
    // component creates a JobHistoryEvent in the meanwhile, it will be just be
    // queued inside the JobHistoryEventHandler
    addIfService(historyHandlerService);

    super.init(conf);
  }
  @Override
  public void serviceInit(final Configuration conf) throws Exception {

    this.state = DAGAppMasterState.INITED;

    this.amConf = conf;
    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);

    downloadTokensAndSetupUGI(conf);

    context = new RunningAppContext(conf);

    clientHandler = new DAGClientHandler();

    dispatcher = createDispatcher();
    addIfService(dispatcher, false);

    clientRpcServer = new DAGClientServer(clientHandler);
    addIfService(clientRpcServer, true);

    taskHeartbeatHandler = createTaskHeartbeatHandler(context, conf);
    addIfService(taskHeartbeatHandler, true);

    containerHeartbeatHandler = createContainerHeartbeatHandler(context, conf);
    addIfService(containerHeartbeatHandler, true);

    // service to handle requests to TaskUmbilicalProtocol
    taskAttemptListener =
        createTaskAttemptListener(context, taskHeartbeatHandler, containerHeartbeatHandler);
    addIfService(taskAttemptListener, true);

    containers = new AMContainerMap(containerHeartbeatHandler, taskAttemptListener, context);
    addIfService(containers, true);
    dispatcher.register(AMContainerEventType.class, containers);

    nodes = new AMNodeMap(dispatcher.getEventHandler(), context);
    addIfService(nodes, true);
    dispatcher.register(AMNodeEventType.class, nodes);

    // service to do the task cleanup
    taskCleaner = createTaskCleaner(context);
    addIfService(taskCleaner, true);

    this.dagEventDispatcher = new DagEventDispatcher();
    this.vertexEventDispatcher = new VertexEventDispatcher();

    // register the event dispatchers
    dispatcher.register(DAGAppMasterEventType.class, new DAGAppMasterEventHandler());
    dispatcher.register(DAGEventType.class, dagEventDispatcher);
    dispatcher.register(VertexEventType.class, vertexEventDispatcher);
    dispatcher.register(TaskEventType.class, new TaskEventDispatcher());
    dispatcher.register(TaskAttemptEventType.class, new TaskAttemptEventDispatcher());
    dispatcher.register(TaskCleaner.EventType.class, taskCleaner);

    taskSchedulerEventHandler =
        new TaskSchedulerEventHandler(context, clientRpcServer, dispatcher.getEventHandler());
    addIfService(taskSchedulerEventHandler, true);
    dispatcher.register(AMSchedulerEventType.class, taskSchedulerEventHandler);
    addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);

    containerLauncher = createContainerLauncher(context);
    addIfService(containerLauncher, true);
    dispatcher.register(NMCommunicatorEventType.class, containerLauncher);

    historyEventHandler = new HistoryEventHandler(context);
    addIfService(historyEventHandler, true);
    dispatcher.register(HistoryEventType.class, historyEventHandler);

    this.sessionTimeoutInterval =
        1000
            * amConf.getInt(
                TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS,
                TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS_DEFAULT);

    initServices(conf);
    super.serviceInit(conf);
  }
 @SuppressWarnings("unchecked")
 private void sendEvent(Event<?> event) {
   dispatcher.getEventHandler().handle(event);
 }