/** * Get a path from the local FS. If size is known, we go round-robin over the set of disks (via * the configured dirs) and return the first complete path which has enough space. * * <p>If size is not known, use roulette selection -- pick directories with probability * proportional to their available space. */ public synchronized Path getLocalPathForWrite( String pathStr, long size, Configuration conf, boolean checkWrite) throws IOException { confChanged(conf); int numDirs = localDirsPath.length; int numDirsSearched = 0; // remove the leading slash from the path (to make sure that the uri // resolution results in a valid path on the dir being checked) if (pathStr.startsWith("/")) { pathStr = pathStr.substring(1); } Path returnPath = null; Path path = new Path(pathStr); if (size == SIZE_UNKNOWN) { // do roulette selection: pick dir with probability // proportional to available size long[] availableOnDisk = new long[dirDF.length]; long totalAvailable = 0; // build the "roulette wheel" for (int i = 0; i < dirDF.length; ++i) { availableOnDisk[i] = dirDF[i].getAvailable(); totalAvailable += availableOnDisk[i]; } // Keep rolling the wheel till we get a valid path Random r = new java.util.Random(); while (numDirsSearched < numDirs && returnPath == null) { long randomPosition = Math.abs(r.nextLong()) % totalAvailable; int dir = 0; while (randomPosition > availableOnDisk[dir]) { randomPosition -= availableOnDisk[dir]; dir++; } dirNumLastAccessed = dir; returnPath = createPath(path, checkWrite); if (returnPath == null) { totalAvailable -= availableOnDisk[dir]; availableOnDisk[dir] = 0; // skip this disk numDirsSearched++; } } } else { while (numDirsSearched < numDirs && returnPath == null) { long capacity = dirDF[dirNumLastAccessed].getAvailable(); if (capacity > size) { returnPath = createPath(path, checkWrite); } dirNumLastAccessed++; dirNumLastAccessed = dirNumLastAccessed % numDirs; numDirsSearched++; } } if (returnPath != null) { return returnPath; } // no path found throw new DiskErrorException("Could not find any valid local " + "directory for " + pathStr); }
/** * This method gets called everytime before any read/write to make sure that any change to * localDirs is reflected immediately. */ private synchronized void confChanged(Configuration conf) throws IOException { String newLocalDirs = conf.get(contextCfgItemName); if (!newLocalDirs.equals(savedLocalDirs)) { String[] localDirs = conf.getStrings(contextCfgItemName); localFS = FileSystem.getLocal(conf); int numDirs = localDirs.length; ArrayList<String> dirs = new ArrayList<String>(numDirs); ArrayList<DF> dfList = new ArrayList<DF>(numDirs); for (int i = 0; i < numDirs; i++) { try { // filter problematic directories Path tmpDir = new Path(localDirs[i]); if (localFS.mkdirs(tmpDir) || localFS.exists(tmpDir)) { try { DiskChecker.checkDir(new File(localDirs[i])); dirs.add(localDirs[i]); dfList.add(new DF(new File(localDirs[i]), 30000)); } catch (DiskErrorException de) { LOG.warn(localDirs[i] + "is not writable\n" + StringUtils.stringifyException(de)); } } else { LOG.warn("Failed to create " + localDirs[i]); } } catch (IOException ie) { LOG.warn( "Failed to create " + localDirs[i] + ": " + ie.getMessage() + "\n" + StringUtils.stringifyException(ie)); } // ignore } localDirsPath = new Path[dirs.size()]; for (int i = 0; i < localDirsPath.length; i++) { localDirsPath[i] = new Path(dirs.get(i)); } dirDF = dfList.toArray(new DF[dirs.size()]); savedLocalDirs = newLocalDirs; // randomize the first disk picked in the round-robin selection dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size()); } }
/** * Get the locally cached file or archive; it could either be previously cached (and valid) or * copy it from the {@link FileSystem} now. * * @param cache the cache to be localized, this should be specified as new * URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is * provided the file is assumed to be in the filesystem being used in the Configuration * @param conf The Confguration file which contains the filesystem * @param subDir The sub cache Dir where you want to localize the files/archives * @param fileStatus The file status on the dfs. * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or * .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred * automatically and the directory where the archive is unzipped/unjarred/untarred is returned * as the Path. In case of a file, the path to the file is returned * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be * cached hasn't changed since the job started * @param fileLength this is the length of the cache file * @param currentWorkDir this is the directory where you would want to create symlinks for the * locally cached files/archives * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says * so (this is required for an optimization in task launches * @param lDirAllocator LocalDirAllocator of the tracker * @return the path to directory where the archives are unjarred in case of archives, the path to * the file where the file is copied locally * @throws IOException */ private static Path getLocalCache( URI cache, Configuration conf, Path subDir, FileStatus fileStatus, boolean isArchive, long confFileStamp, long fileLength, Path currentWorkDir, boolean honorSymLinkConf, MRAsyncDiskService asyncDiskService, LocalDirAllocator lDirAllocator) throws IOException { String key = getKey(cache, conf, confFileStamp); CacheStatus lcacheStatus; Path localizedPath; synchronized (cachedArchives) { lcacheStatus = cachedArchives.get(key); if (lcacheStatus == null) { // was never localized Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong())); String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString(); Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf); lcacheStatus = new CacheStatus( new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir); cachedArchives.put(key, lcacheStatus); } lcacheStatus.refcount++; } boolean initSuccessful = false; try { synchronized (lcacheStatus) { if (!lcacheStatus.isInited()) { localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive); lcacheStatus.initComplete(); } else { if (fileStatus != null) { localizedPath = checkCacheStatusValidity( conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive); } else { // if fileStatus is null, then the md5 must be correct // so there is no need to check for cache validity localizedPath = lcacheStatus.localizedLoadPath; } } createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf); } // try deleting stuff if you can long size = 0; int numberSubDir = 0; synchronized (lcacheStatus) { synchronized (baseDirSize) { Long get = baseDirSize.get(lcacheStatus.getBaseDir()); if (get != null) { size = get.longValue(); } else { LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir()); } } synchronized (baseDirNumberSubDir) { Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir()); if (get != null) { numberSubDir = get.intValue(); } else { LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir()); } } } // setting the cache size to a default of 10GB long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); long allowedNumberSubDir = conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT); if (allowedSize < size || allowedNumberSubDir < numberSubDir) { // try some cache deletions LOG.debug( "Start deleting released cache because" + " [size, allowedSize, numberSubDir, allowedNumberSubDir] =" + " [" + size + ", " + allowedSize + ", " + numberSubDir + ", " + allowedNumberSubDir + "]"); deleteCache(conf, asyncDiskService); } initSuccessful = true; return localizedPath; } finally { if (!initSuccessful) { synchronized (cachedArchives) { lcacheStatus.refcount--; } } } }