private void addDirectoryToJobListCache(Path path) throws IOException { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Adding "+path+" to job list cache.") */ LOG.adding_job_list_cache(path.toString()).debug(); } List<FileStatus> historyFileList = scanDirectoryForHistoryFiles(path, doneDirFc); for (FileStatus fs : historyFileList) { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Adding in history for "+fs.getPath()) */ LOG.adding_history_for(String.valueOf(fs.getPath())).tag("methodCall").debug(); } JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath().getName()); String confFileName = JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId()); String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobIndexInfo.getJobId()); HistoryFileInfo fileInfo = new HistoryFileInfo( fs.getPath(), new Path(fs.getPath().getParent(), confFileName), new Path(fs.getPath().getParent(), summaryFileName), jobIndexInfo, true); jobListCache.addIfAbsent(fileInfo); } }
private void makeDoneSubdir(Path path) throws IOException { try { doneDirFc.getFileStatus(path); existingDoneSubdirs.add(path); } catch (FileNotFoundException fnfE) { try { FsPermission fsp = new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION); doneDirFc.mkdir(path, fsp, true); FileStatus fsStatus = doneDirFc.getFileStatus(path); /* LOG.info("Perms after creating "+fsStatus.getPermission().toShort()+", Expected: "+fsp.toShort()) */ LOG.perms_after_creating_expected( String.valueOf(fsStatus.getPermission().toShort()), String.valueOf(fsp.toShort())) .tag("methodCall") .info(); if (fsStatus.getPermission().toShort() != fsp.toShort()) { /* LOG.info("Explicitly setting permissions to : "+fsp.toShort()+", "+fsp) */ LOG.explicitly_setting_permissions(String.valueOf(fsp.toShort()), fsp.toString()) .tag("methodCall") .info(); doneDirFc.setPermission(path, fsp); } existingDoneSubdirs.add(path); } catch (FileAlreadyExistsException faeE) { // Nothing to do. } } }
/** * List input directories. Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = getInputDirRecursive(job); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<FileStatus> result = null; int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS); Stopwatch sw = new Stopwatch().start(); if (numThreads == 1) { result = singleThreadedListStatus(job, dirs, inputFilter, recursive); } else { Iterable<FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job.getConfiguration(), dirs, recursive, inputFilter, true); locatedFiles = locatedFileStatusFetcher.getFileStatuses(); } catch (InterruptedException e) { throw new IOException("Interrupted while getting file statuses"); } result = Lists.newArrayList(locatedFiles); } sw.stop(); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Time taken to get FileStatuses: "+sw.elapsedMillis()) */ LOG.time_taken_get_filestatuses(String.valueOf(sw.elapsedMillis())).tag("methodCall").debug(); } /* LOG.info("Total input paths to process : "+result.size()) */ LOG.total_input_paths_process(String.valueOf(result.size())).tag("methodCall").info(); return result; }
private void moveToDoneNow(final Path src, final Path target) throws IOException { /* LOG.info("Moving "+src.toString()+" to "+target.toString()) */ LOG.moving(String.valueOf(src.toString()), String.valueOf(target.toString())) .tag("methodCall") .info(); intermediateDoneDirFc.rename(src, target, Options.Rename.NONE); }
public void delete(HistoryFileInfo fileInfo) { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Removing from cache "+fileInfo) */ LOG.removing_from_cache(fileInfo.toString()).debug(); } cache.remove(fileInfo.getJobId()); }
public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) { JobId jobId = fileInfo.getJobId(); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Adding "+jobId+" to job list cache with "+fileInfo.getJobIndexInfo()) */ LOG.adding_job_list_cache_with(jobId.toString(), String.valueOf(fileInfo.getJobIndexInfo())) .tag("methodCall") .debug(); } HistoryFileInfo old = cache.putIfAbsent(jobId, fileInfo); if (cache.size() > maxSize) { // There is a race here, where more then one thread could be trying to // remove entries. This could result in too many entries being removed // from the cache. This is considered OK as the size of the cache // should be rather large, and we would rather have performance over // keeping the cache size exactly at the maximum. Iterator<JobId> keys = cache.navigableKeySet().iterator(); long cutoff = System.currentTimeMillis() - maxAge; while (cache.size() > maxSize && keys.hasNext()) { JobId key = keys.next(); HistoryFileInfo firstValue = cache.get(key); if (firstValue != null) { synchronized (firstValue) { if (firstValue.isMovePending()) { if (firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) { cache.remove(key); // Now lets try to delete it try { firstValue.delete(); } catch (IOException e) { /* LOG.error("Error while trying to delete history files"+" that could not be moved to done.",e) */ LOG.error_while_trying_delete_history_files_(e.toString()).error(); } } else { /* LOG.warn("Waiting to remove "+key+" from JobListCache because it is not in done yet.") */ LOG.waiting_remove_from_joblistcache_because(key.toString()).warn(); } } else { cache.remove(key); } } } } } return old; }
private void removeDirectoryFromSerialNumberIndex(Path serialDirPath) { String serialPart = serialDirPath.getName(); String timeStampPart = JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString()); if (timeStampPart == null) { /* LOG.warn("Could not find timestamp portion from path: "+serialDirPath.toString()+". Continuing with next") */ LOG.could_not_find_timestamp_portion_from_co(String.valueOf(serialDirPath.toString())) .tag("methodCall") .warn(); return; } if (serialPart == null) { /* LOG.warn("Could not find serial portion from path: "+serialDirPath.toString()+". Continuing with next") */ LOG.could_not_find_serial_portion_from_conti(String.valueOf(serialDirPath.toString())) .tag("methodCall") .warn(); return; } serialNumberIndex.remove(serialPart, timeStampPart); }
private void addDirectoryToSerialNumberIndex(Path serialDirPath) { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Adding "+serialDirPath+" to serial index") */ LOG.adding_serial_index(serialDirPath.toString()).debug(); } String serialPart = serialDirPath.getName(); String timestampPart = JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString()); if (timestampPart == null) { /* LOG.warn("Could not find timestamp portion from path: "+serialDirPath+". Continuing with next") */ LOG.could_not_find_timestamp_portion_from_co(serialDirPath.toString()).warn(); return; } if (serialPart == null) { /* LOG.warn("Could not find serial portion from path: "+serialDirPath.toString()+". Continuing with next") */ LOG.could_not_find_serial_portion_from_conti(String.valueOf(serialDirPath.toString())) .tag("methodCall") .warn(); } else { serialNumberIndex.add(serialPart, timestampPart); } }
/** Populates index data structures. Should only be called at initialization times. */ @SuppressWarnings("unchecked") void initExisting() throws IOException { /* LOG.info("Initializing Existing Jobs...") */ LOG.initializing_existing_jobs().info(); List<FileStatus> timestampedDirList = findTimestampedDirectories(); // Sort first just so insertion is in a consistent order Collections.sort(timestampedDirList); for (FileStatus fs : timestampedDirList) { // TODO Could verify the correct format for these directories. addDirectoryToSerialNumberIndex(fs.getPath()); addDirectoryToJobListCache(fs.getPath()); } }
private void mkdir(FileContext fc, Path path, FsPermission fsp) throws IOException { if (!fc.util().exists(path)) { try { fc.mkdir(path, fsp, true); FileStatus fsStatus = fc.getFileStatus(path); /* LOG.info("Perms after creating "+fsStatus.getPermission().toShort()+", Expected: "+fsp.toShort()) */ LOG.perms_after_creating_expected( String.valueOf(fsStatus.getPermission().toShort()), String.valueOf(fsp.toShort())) .tag("methodCall") .info(); if (fsStatus.getPermission().toShort() != fsp.toShort()) { /* LOG.info("Explicitly setting permissions to : "+fsp.toShort()+", "+fsp) */ LOG.explicitly_setting_permissions(String.valueOf(fsp.toShort()), fsp.toString()) .tag("methodCall") .info(); fc.setPermission(path, fsp); } } catch (FileAlreadyExistsException e) { /* LOG.info("Directory: ["+path+"] already exists.") */ LOG.directory_already_exists(path.toString()).info(); } } }
public synchronized void add(String serialPart, String timestampPart) { if (!cache.containsKey(serialPart)) { cache.put(serialPart, new HashSet<String>()); if (cache.size() > maxSize) { String key = cache.firstKey(); /* LOG.error("Dropping "+key+" from the SerialNumberIndex. We will no "+"longer be able to see jobs that are in that serial index for "+cache.get(key)) */ LOG.dropping_from_serialnumberindex_will_lon(key, String.valueOf(cache.get(key))) .tag("methodCall") .error(); cache.remove(key); } } Set<String> datePartSet = cache.get(serialPart); datePartSet.add(timestampPart); }
/** * Scans the intermediate directory to find user directories. Scans these for history files if the * modification time for the directory has changed. Once it finds history files it starts the * process of moving them to the done directory. * * @throws IOException if there was a error while scanning */ void scanIntermediateDirectory() throws IOException { // TODO it would be great to limit how often this happens, except in the // case where we are looking for a particular job. List<FileStatus> userDirList = JobHistoryUtils.localGlobber(intermediateDoneDirFc, intermediateDoneDirPath, ""); /* LOG.debug("Scanning intermediate dirs") */ LOG.scanning_intermediate_dirs().debug(); for (FileStatus userDir : userDirList) { String name = userDir.getPath().getName(); UserLogDir dir = userDirModificationTimeMap.get(name); if (dir == null) { dir = new UserLogDir(); UserLogDir old = userDirModificationTimeMap.putIfAbsent(name, dir); if (old != null) { dir = old; } } dir.scanIfNeeded(userDir); } }
/** * Generate the list of files and make them into FileSplits. * * @param job the job context * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { Stopwatch sw = new Stopwatch().start(); long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { BlockLocation[] blkLocations; if (file instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus) file).getBlockLocations(); } else { FileSystem fs = path.getFileSystem(job.getConfiguration()); blkLocations = fs.getFileBlockLocations(file, 0, length); } if (isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( makeSplit( path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( makeSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts())); } } else { // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts())); } } else { // Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); sw.stop(); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Total # of splits generated by getSplits: "+splits.size()+", TimeTaken: "+sw.elapsedMillis()) */ LOG.total_splits_generated_getsplits_timetak( String.valueOf(splits.size()), String.valueOf(sw.elapsedMillis())) .tag("methodCall") .debug(); } return splits; }
/** * Scans the specified path and populates the intermediate cache. * * @param absPath * @throws IOException */ private void scanIntermediateDirectory(final Path absPath) throws IOException { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Scanning intermediate dir "+absPath) */ LOG.scanning_intermediate_dir(absPath.toString()).debug(); } List<FileStatus> fileStatusList = scanDirectoryForHistoryFiles(absPath, intermediateDoneDirFc); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Found "+fileStatusList.size()+" files") */ LOG.found_files(String.valueOf(fileStatusList.size())).tag("methodCall").debug(); } for (FileStatus fs : fileStatusList) { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("scanning file: "+fs.getPath()) */ LOG.scanning_file(String.valueOf(fs.getPath())).tag("methodCall").debug(); } JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath().getName()); String confFileName = JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId()); String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobIndexInfo.getJobId()); HistoryFileInfo fileInfo = new HistoryFileInfo( fs.getPath(), new Path(fs.getPath().getParent(), confFileName), new Path(fs.getPath().getParent(), summaryFileName), jobIndexInfo, false); final HistoryFileInfo old = jobListCache.addIfAbsent(fileInfo); if (old == null || old.didMoveFail()) { final HistoryFileInfo found = (old == null) ? fileInfo : old; long cutoff = System.currentTimeMillis() - maxHistoryAge; if (found.getJobIndexInfo().getFinishTime() <= cutoff) { try { found.delete(); } catch (IOException e) { /* LOG.warn("Error cleaning up a HistoryFile that is out of date.",e) */ LOG.error_cleaning_historyfile_that_out_date(e.toString()).warn(); } } else { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Scheduling move to done of "+found) */ LOG.scheduling_move_done(found.toString()).debug(); } moveToDoneExecutor.execute( new Runnable() { @Override public void run() { try { found.moveToDone(); } catch (IOException e) { /* LOG.info("Failed to process fileInfo for job: "+found.getJobId(),e) */ LOG.failed_process_fileinfo_for_job( String.valueOf(found.getJobId()), e.toString()) .tag("methodCall") .info(); } } }); } } else if (old != null && !old.isMovePending()) { // This is a duplicate so just delete it if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Duplicate: deleting") */ LOG.duplicate_deleting().debug(); } fileInfo.delete(); } } }
/** * Returns TRUE if the history dirs were created, FALSE if they could not be created because the * FileSystem is not reachable or in safe mode and throws and exception otherwise. */ @VisibleForTesting boolean tryCreatingHistoryDirs(boolean logWait) throws IOException { boolean succeeded = true; String doneDirPrefix = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); try { doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(new Path(doneDirPrefix)); doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); mkdir( doneDirFc, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); } catch (ConnectException ex) { if (logWait) { /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be available") */ LOG.waiting_for_filesystem_available( String.valueOf(doneDirPrefixPath.toUri().getAuthority())) .tag("methodCall") .info(); } succeeded = false; } catch (IOException e) { if (isBecauseSafeMode(e)) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be out of safe mode") */ LOG.waiting_for_filesystem_out_safe_mode( String.valueOf(doneDirPrefixPath.toUri().getAuthority())) .tag("methodCall") .info(); } } else { throw new YarnRuntimeException( "Error creating done directory: [" + doneDirPrefixPath + "]", e); } } if (succeeded) { String intermediateDoneDirPrefix = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); try { intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified(new Path(intermediateDoneDirPrefix)); intermediateDoneDirFc = FileContext.getFileContext(intermediateDoneDirPath.toUri(), conf); mkdir( intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); } catch (ConnectException ex) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be available") */ LOG.waiting_for_filesystem_available( String.valueOf(intermediateDoneDirPath.toUri().getAuthority())) .tag("methodCall") .info(); } } catch (IOException e) { if (isBecauseSafeMode(e)) { succeeded = false; if (logWait) { /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be out of safe mode") */ LOG.waiting_for_filesystem_out_safe_mode( String.valueOf(intermediateDoneDirPath.toUri().getAuthority())) .tag("methodCall") .info(); } } else { throw new YarnRuntimeException( "Error creating intermediate done directory: [" + intermediateDoneDirPath + "]", e); } } } return succeeded; }