@Override public synchronized void start() { this.containerAllocator = new RMContainerAllocator(this.context); ((Service) this.containerAllocator).init(getConfig()); ((Service) this.containerAllocator).start(); super.start(); }
@Override public synchronized void start() { this.containerLauncher = new ContainerLauncherImpl(context); ((Service) this.containerLauncher).init(getConfig()); ((Service) this.containerLauncher).start(); super.start(); }
@Override public void start() { // NodeManager is the last service to start, so NodeId is available. this.nodeId = this.context.getNodeId(); String httpBindAddressStr = getConfig() .get(YarnConfiguration.NM_WEBAPP_ADDRESS, YarnConfiguration.DEFAULT_NM_WEBAPP_ADDRESS); InetSocketAddress httpBindAddress = NetUtils.createSocketAddr( httpBindAddressStr, YarnConfiguration.DEFAULT_NM_WEBAPP_PORT, YarnConfiguration.NM_WEBAPP_ADDRESS); try { // this.hostName = InetAddress.getLocalHost().getCanonicalHostName(); this.httpPort = httpBindAddress.getPort(); // Registration has to be in start so that ContainerManager can get the // perNM tokens needed to authenticate ContainerTokens. registerWithRM(); super.start(); startStatusUpdater(); } catch (Exception e) { throw new AvroRuntimeException(e); } }
@Override public synchronized void stop() { try { accepted.close().awaitUninterruptibly(10, TimeUnit.SECONDS); ServerBootstrap bootstrap = new ServerBootstrap(selector); bootstrap.releaseExternalResources(); pipelineFact.destroy(); localFS.close(); } catch (Throwable t) { LOG.error(t); } finally { super.stop(); } }
@Override public synchronized void init(Configuration conf) { this.rmAddress = conf.get( YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS); this.heartBeatInterval = conf.getLong( YarnConfiguration.NM_TO_RM_HEARTBEAT_INTERVAL_MS, YarnConfiguration.DEFAULT_NM_TO_RM_HEARTBEAT_INTERVAL_MS); int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB); this.totalResource = recordFactory.newRecordInstance(Resource.class); this.totalResource.setMemory(memoryMb); metrics.addResource(totalResource); super.init(conf); }
@Override public void start() { eventHandlingThread = new Thread( new Runnable() { @Override public void run() { JobHistoryEvent event = null; while (!stopped && !Thread.currentThread().isInterrupted()) { // Log the size of the history-event-queue every so often. if (eventCounter != 0 && eventCounter % 1000 == 0) { eventCounter = 0; LOG.info("Size of the JobHistory event queue is " + eventQueue.size()); } else { eventCounter++; } try { event = eventQueue.take(); } catch (InterruptedException e) { LOG.info("EventQueue take interrupted. Returning"); return; } // If an event has been removed from the queue. Handle it. // The rest of the queue is handled via stop() // Clear the interrupt status if it's set before calling handleEvent // and set it if it was set before calling handleEvent. // Interrupts received from other threads during handleEvent cannot be // dealth with - Shell.runCommand() ignores them. synchronized (lock) { boolean isInterrupted = Thread.interrupted(); handleEvent(event); if (isInterrupted) { Thread.currentThread().interrupt(); } } } } }); eventHandlingThread.start(); super.start(); }
// TODO change AbstractService to throw InterruptedException @Override public synchronized void start() { Configuration conf = getConfig(); ServerBootstrap bootstrap = new ServerBootstrap(selector); try { pipelineFact = new HttpPipelineFactory(conf); } catch (Exception ex) { throw new RuntimeException(ex); } bootstrap.setPipelineFactory(pipelineFact); port = conf.getInt(ConfVars.PULLSERVER_PORT.varname, ConfVars.PULLSERVER_PORT.defaultIntVal); Channel ch = bootstrap.bind(new InetSocketAddress(port)); accepted.add(ch); port = ((InetSocketAddress) ch.getLocalAddress()).getPort(); conf.set(ConfVars.PULLSERVER_PORT.varname, Integer.toString(port)); pipelineFact.PullServer.setPort(port); LOG.info(getName() + " listening on port " + port); super.start(); sslFileBufferSize = conf.getInt(SUFFLE_SSL_FILE_BUFFER_SIZE_KEY, DEFAULT_SUFFLE_SSL_FILE_BUFFER_SIZE); }
@Override public synchronized void init(Configuration conf) { try { manageOsCache = conf.getBoolean(SHUFFLE_MANAGE_OS_CACHE, DEFAULT_SHUFFLE_MANAGE_OS_CACHE); readaheadLength = conf.getInt(SHUFFLE_READAHEAD_BYTES, DEFAULT_SHUFFLE_READAHEAD_BYTES); ThreadFactory bossFactory = new ThreadFactoryBuilder().setNameFormat("PullServerAuxService Netty Boss #%d").build(); ThreadFactory workerFactory = new ThreadFactoryBuilder().setNameFormat("PullServerAuxService Netty Worker #%d").build(); selector = new NioServerSocketChannelFactory( Executors.newCachedThreadPool(bossFactory), Executors.newCachedThreadPool(workerFactory)); localFS = new LocalFileSystem(); super.init(new Configuration(conf)); } catch (Throwable t) { LOG.error(t); } }
@Override public synchronized void stop() { ((Service) this.containerLauncher).stop(); super.stop(); }
@Override public synchronized void stop() { // Interrupt the updater. this.isStopped = true; super.stop(); }
@Override public void stop() { LOG.info( "Stopping JobHistoryEventHandler. " + "Size of the outstanding queue size is " + eventQueue.size()); stopped = true; // do not interrupt while event handling is in progress synchronized (lock) { if (eventHandlingThread != null) eventHandlingThread.interrupt(); } try { if (eventHandlingThread != null) eventHandlingThread.join(); } catch (InterruptedException ie) { LOG.info("Interruped Exception while stopping", ie); } // Cancel all timers - so that they aren't invoked during or after // the metaInfo object is wrapped up. for (MetaInfo mi : fileMap.values()) { try { mi.shutDownTimer(); } catch (IOException e) { LOG.info( "Exception while cancelling delayed flush timer. " + "Likely caused by a failed flush " + e.getMessage()); } } // write all the events remaining in queue Iterator<JobHistoryEvent> it = eventQueue.iterator(); while (it.hasNext()) { JobHistoryEvent ev = it.next(); LOG.info("In stop, writing event " + ev.getType()); handleEvent(ev); } // Process JobUnsuccessfulCompletionEvent for jobIds which still haven't // closed their event writers Iterator<JobId> jobIt = fileMap.keySet().iterator(); if (isSignalled) { while (jobIt.hasNext()) { JobId toClose = jobIt.next(); MetaInfo mi = fileMap.get(toClose); if (mi != null && mi.isWriterActive()) { LOG.warn("Found jobId " + toClose + " to have not been closed. Will close"); // Create a JobFinishEvent so that it is written to the job history JobUnsuccessfulCompletionEvent jucEvent = new JobUnsuccessfulCompletionEvent( TypeConverter.fromYarn(toClose), System.currentTimeMillis(), context.getJob(toClose).getCompletedMaps(), context.getJob(toClose).getCompletedReduces(), JobState.KILLED.toString()); JobHistoryEvent jfEvent = new JobHistoryEvent(toClose, jucEvent); // Bypass the queue mechanism which might wait. Call the method directly handleEvent(jfEvent); } } } // close all file handles for (MetaInfo mi : fileMap.values()) { try { mi.closeWriter(); } catch (IOException e) { LOG.info("Exception while closing file " + e.getMessage()); } } LOG.info("Stopped JobHistoryEventHandler. super.stop()"); super.stop(); }
/* (non-Javadoc) * @see org.apache.hadoop.yarn.service.AbstractService#init(org. * apache.hadoop.conf.Configuration) * Initializes the FileSystem and Path objects for the log and done directories. * Creates these directories if they do not already exist. */ @Override public void init(Configuration conf) { String stagingDirStr = null; String doneDirStr = null; String userDoneDirStr = null; try { stagingDirStr = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf); doneDirStr = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); userDoneDirStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); } catch (IOException e) { LOG.error("Failed while getting the configured log directories", e); throw new YarnException(e); } // Check for the existence of the history staging dir. Maybe create it. try { stagingDirPath = FileSystem.get(conf).makeQualified(new Path(stagingDirStr)); stagingDirFS = FileSystem.get(stagingDirPath.toUri(), conf); mkdir( stagingDirFS, stagingDirPath, new FsPermission(JobHistoryUtils.HISTORY_STAGING_DIR_PERMISSIONS)); } catch (IOException e) { LOG.error( "Failed while checking for/creating history staging path: [" + stagingDirPath + "]", e); throw new YarnException(e); } // Check for the existence of intermediate done dir. Path doneDirPath = null; try { doneDirPath = FileSystem.get(conf).makeQualified(new Path(doneDirStr)); doneDirFS = FileSystem.get(doneDirPath.toUri(), conf); // This directory will be in a common location, or this may be a cluster // meant for a single user. Creating based on the conf. Should ideally be // created by the JobHistoryServer or as part of deployment. if (!doneDirFS.exists(doneDirPath)) { if (JobHistoryUtils.shouldCreateNonUserDirectory(conf)) { LOG.info( "Creating intermediate history logDir: [" + doneDirPath + "] + based on conf. Should ideally be created by the JobHistoryServer: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR); mkdir( doneDirFS, doneDirPath, new FsPermission( JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); // TODO Temporary toShort till new FsPermission(FsPermissions) // respects // sticky } else { String message = "Not creating intermediate history logDir: [" + doneDirPath + "] based on conf: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR + ". Either set to true or pre-create this directory with" + " appropriate permissions"; LOG.error(message); throw new YarnException(message); } } } catch (IOException e) { LOG.error( "Failed checking for the existance of history intermediate " + "done directory: [" + doneDirPath + "]"); throw new YarnException(e); } // Check/create user directory under intermediate done dir. try { doneDirPrefixPath = FileSystem.get(conf).makeQualified(new Path(userDoneDirStr)); mkdir( doneDirFS, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_USER_DIR_PERMISSIONS)); } catch (IOException e) { LOG.error( "Error creating user intermediate history done directory: [ " + doneDirPrefixPath + "]", e); throw new YarnException(e); } // Maximum number of unflushed completion-events that can stay in the queue // before flush kicks in. maxUnflushedCompletionEvents = conf.getInt( MRJobConfig.MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS, MRJobConfig.DEFAULT_MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS); // We want to cut down flushes after job completes so as to write quicker, // so we increase maxUnflushedEvents post Job completion by using the // following multiplier. postJobCompletionMultiplier = conf.getInt( MRJobConfig.MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER, MRJobConfig.DEFAULT_MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER); // Max time until which flush doesn't take place. flushTimeout = conf.getLong( MRJobConfig.MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS, MRJobConfig.DEFAULT_MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS); minQueueSizeForBatchingFlushes = conf.getInt( MRJobConfig.MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD, MRJobConfig.DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD); super.init(conf); }