/** * Returns TRUE if the history dirs were created, FALSE if they could not be created because the * FileSystem is not reachable or in safe mode and throws and exception otherwise. */ @VisibleForTesting boolean tryCreatingHistoryDirs(boolean logWait) throws IOException { boolean succeeded = true; String doneDirPrefix = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); try { doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(new Path(doneDirPrefix)); doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); mkdir( doneDirFc, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); } catch (ConnectException ex) { if (logWait) { /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be available") */ LOG.waiting_for_filesystem_available( String.valueOf(doneDirPrefixPath.toUri().getAuthority())) .tag("methodCall") .info(); } succeeded = false; } catch (IOException e) { if (isBecauseSafeMode(e)) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be out of safe mode") */ LOG.waiting_for_filesystem_out_safe_mode( String.valueOf(doneDirPrefixPath.toUri().getAuthority())) .tag("methodCall") .info(); } } else { throw new YarnRuntimeException( "Error creating done directory: [" + doneDirPrefixPath + "]", e); } } if (succeeded) { String intermediateDoneDirPrefix = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); try { intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified(new Path(intermediateDoneDirPrefix)); intermediateDoneDirFc = FileContext.getFileContext(intermediateDoneDirPath.toUri(), conf); mkdir( intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); } catch (ConnectException ex) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be available") */ LOG.waiting_for_filesystem_available( String.valueOf(intermediateDoneDirPath.toUri().getAuthority())) .tag("methodCall") .info(); } } catch (IOException e) { if (isBecauseSafeMode(e)) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be out of safe mode") */ LOG.waiting_for_filesystem_out_safe_mode( String.valueOf(intermediateDoneDirPath.toUri().getAuthority())) .tag("methodCall") .info(); } } else { throw new YarnRuntimeException( "Error creating intermediate done directory: [" + intermediateDoneDirPath + "]", e); } } } return succeeded; }
@Override protected void serviceInit(Configuration conf) throws Exception { this.conf = conf; int serialNumberLowDigits = 3; serialNumberFormat = ("%0" + (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS + serialNumberLowDigits) + "d"); String doneDirPrefix = null; doneDirPrefix = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); try { doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(new Path(doneDirPrefix)); doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); mkdir( doneDirFc, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); } catch (IOException e) { throw new YarnRuntimeException( "Error creating done directory: [" + doneDirPrefixPath + "]", e); } String intermediateDoneDirPrefix = null; intermediateDoneDirPrefix = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); try { intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified(new Path(intermediateDoneDirPrefix)); intermediateDoneDirFc = FileContext.getFileContext(intermediateDoneDirPath.toUri(), conf); mkdir( intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); } catch (IOException e) { LOG.info("error creating done directory on dfs " + e); throw new YarnRuntimeException( "Error creating intermediate done directory: [" + intermediateDoneDirPath + "]", e); } this.aclsMgr = new JobACLsManager(conf); maxHistoryAge = conf.getLong(JHAdminConfig.MR_HISTORY_MAX_AGE_MS, JHAdminConfig.DEFAULT_MR_HISTORY_MAX_AGE); jobListCache = new JobListCache( conf.getInt( JHAdminConfig.MR_HISTORY_JOBLIST_CACHE_SIZE, JHAdminConfig.DEFAULT_MR_HISTORY_JOBLIST_CACHE_SIZE), maxHistoryAge); serialNumberIndex = new SerialNumberIndex( conf.getInt( JHAdminConfig.MR_HISTORY_DATESTRING_CACHE_SIZE, JHAdminConfig.DEFAULT_MR_HISTORY_DATESTRING_CACHE_SIZE)); int numMoveThreads = conf.getInt( JHAdminConfig.MR_HISTORY_MOVE_THREAD_COUNT, JHAdminConfig.DEFAULT_MR_HISTORY_MOVE_THREAD_COUNT); ThreadFactory tf = new ThreadFactoryBuilder().setNameFormat("MoveIntermediateToDone Thread #%d").build(); moveToDoneExecutor = new ThreadPoolExecutor( numMoveThreads, numMoveThreads, 1, TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>(), tf); super.serviceInit(conf); }
/* (non-Javadoc) * @see org.apache.hadoop.yarn.service.AbstractService#init(org. * apache.hadoop.conf.Configuration) * Initializes the FileSystem and Path objects for the log and done directories. * Creates these directories if they do not already exist. */ @Override public void init(Configuration conf) { String stagingDirStr = null; String doneDirStr = null; String userDoneDirStr = null; try { stagingDirStr = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf); doneDirStr = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); userDoneDirStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); } catch (IOException e) { LOG.error("Failed while getting the configured log directories", e); throw new YarnException(e); } // Check for the existence of the history staging dir. Maybe create it. try { stagingDirPath = FileSystem.get(conf).makeQualified(new Path(stagingDirStr)); stagingDirFS = FileSystem.get(stagingDirPath.toUri(), conf); mkdir( stagingDirFS, stagingDirPath, new FsPermission(JobHistoryUtils.HISTORY_STAGING_DIR_PERMISSIONS)); } catch (IOException e) { LOG.error( "Failed while checking for/creating history staging path: [" + stagingDirPath + "]", e); throw new YarnException(e); } // Check for the existence of intermediate done dir. Path doneDirPath = null; try { doneDirPath = FileSystem.get(conf).makeQualified(new Path(doneDirStr)); doneDirFS = FileSystem.get(doneDirPath.toUri(), conf); // This directory will be in a common location, or this may be a cluster // meant for a single user. Creating based on the conf. Should ideally be // created by the JobHistoryServer or as part of deployment. if (!doneDirFS.exists(doneDirPath)) { if (JobHistoryUtils.shouldCreateNonUserDirectory(conf)) { LOG.info( "Creating intermediate history logDir: [" + doneDirPath + "] + based on conf. Should ideally be created by the JobHistoryServer: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR); mkdir( doneDirFS, doneDirPath, new FsPermission( JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); // TODO Temporary toShort till new FsPermission(FsPermissions) // respects // sticky } else { String message = "Not creating intermediate history logDir: [" + doneDirPath + "] based on conf: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR + ". Either set to true or pre-create this directory with" + " appropriate permissions"; LOG.error(message); throw new YarnException(message); } } } catch (IOException e) { LOG.error( "Failed checking for the existance of history intermediate " + "done directory: [" + doneDirPath + "]"); throw new YarnException(e); } // Check/create user directory under intermediate done dir. try { doneDirPrefixPath = FileSystem.get(conf).makeQualified(new Path(userDoneDirStr)); mkdir( doneDirFS, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_USER_DIR_PERMISSIONS)); } catch (IOException e) { LOG.error( "Error creating user intermediate history done directory: [ " + doneDirPrefixPath + "]", e); throw new YarnException(e); } // Maximum number of unflushed completion-events that can stay in the queue // before flush kicks in. maxUnflushedCompletionEvents = conf.getInt( MRJobConfig.MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS, MRJobConfig.DEFAULT_MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS); // We want to cut down flushes after job completes so as to write quicker, // so we increase maxUnflushedEvents post Job completion by using the // following multiplier. postJobCompletionMultiplier = conf.getInt( MRJobConfig.MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER, MRJobConfig.DEFAULT_MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER); // Max time until which flush doesn't take place. flushTimeout = conf.getLong( MRJobConfig.MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS, MRJobConfig.DEFAULT_MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS); minQueueSizeForBatchingFlushes = conf.getInt( MRJobConfig.MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD, MRJobConfig.DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD); super.init(conf); }