/** * Get a path from the local FS. If size is known, we go round-robin over the set of disks (via * the configured dirs) and return the first complete path which has enough space. * * <p>If size is not known, use roulette selection -- pick directories with probability * proportional to their available space. */ public synchronized Path getLocalPathForWrite( String pathStr, long size, Configuration conf, boolean checkWrite) throws IOException { confChanged(conf); int numDirs = localDirsPath.length; int numDirsSearched = 0; // remove the leading slash from the path (to make sure that the uri // resolution results in a valid path on the dir being checked) if (pathStr.startsWith("/")) { pathStr = pathStr.substring(1); } Path returnPath = null; Path path = new Path(pathStr); if (size == SIZE_UNKNOWN) { // do roulette selection: pick dir with probability // proportional to available size long[] availableOnDisk = new long[dirDF.length]; long totalAvailable = 0; // build the "roulette wheel" for (int i = 0; i < dirDF.length; ++i) { availableOnDisk[i] = dirDF[i].getAvailable(); totalAvailable += availableOnDisk[i]; } // Keep rolling the wheel till we get a valid path Random r = new java.util.Random(); while (numDirsSearched < numDirs && returnPath == null) { long randomPosition = Math.abs(r.nextLong()) % totalAvailable; int dir = 0; while (randomPosition > availableOnDisk[dir]) { randomPosition -= availableOnDisk[dir]; dir++; } dirNumLastAccessed = dir; returnPath = createPath(path, checkWrite); if (returnPath == null) { totalAvailable -= availableOnDisk[dir]; availableOnDisk[dir] = 0; // skip this disk numDirsSearched++; } } } else { while (numDirsSearched < numDirs && returnPath == null) { long capacity = dirDF[dirNumLastAccessed].getAvailable(); if (capacity > size) { returnPath = createPath(path, checkWrite); } dirNumLastAccessed++; dirNumLastAccessed = dirNumLastAccessed % numDirs; numDirsSearched++; } } if (returnPath != null) { return returnPath; } // no path found throw new DiskErrorException("Could not find any valid local " + "directory for " + pathStr); }
/** * Get the locally cached file or archive; it could either be previously cached (and valid) or * copy it from the {@link FileSystem} now. * * @param cache the cache to be localized, this should be specified as new * URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is * provided the file is assumed to be in the filesystem being used in the Configuration * @param conf The Confguration file which contains the filesystem * @param subDir The sub cache Dir where you want to localize the files/archives * @param fileStatus The file status on the dfs. * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or * .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred * automatically and the directory where the archive is unzipped/unjarred/untarred is returned * as the Path. In case of a file, the path to the file is returned * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be * cached hasn't changed since the job started * @param fileLength this is the length of the cache file * @param currentWorkDir this is the directory where you would want to create symlinks for the * locally cached files/archives * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says * so (this is required for an optimization in task launches * @param lDirAllocator LocalDirAllocator of the tracker * @return the path to directory where the archives are unjarred in case of archives, the path to * the file where the file is copied locally * @throws IOException */ private static Path getLocalCache( URI cache, Configuration conf, Path subDir, FileStatus fileStatus, boolean isArchive, long confFileStamp, long fileLength, Path currentWorkDir, boolean honorSymLinkConf, MRAsyncDiskService asyncDiskService, LocalDirAllocator lDirAllocator) throws IOException { String key = getKey(cache, conf, confFileStamp); CacheStatus lcacheStatus; Path localizedPath; synchronized (cachedArchives) { lcacheStatus = cachedArchives.get(key); if (lcacheStatus == null) { // was never localized Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong())); String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString(); Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf); lcacheStatus = new CacheStatus( new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir); cachedArchives.put(key, lcacheStatus); } lcacheStatus.refcount++; } boolean initSuccessful = false; try { synchronized (lcacheStatus) { if (!lcacheStatus.isInited()) { localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive); lcacheStatus.initComplete(); } else { if (fileStatus != null) { localizedPath = checkCacheStatusValidity( conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive); } else { // if fileStatus is null, then the md5 must be correct // so there is no need to check for cache validity localizedPath = lcacheStatus.localizedLoadPath; } } createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf); } // try deleting stuff if you can long size = 0; int numberSubDir = 0; synchronized (lcacheStatus) { synchronized (baseDirSize) { Long get = baseDirSize.get(lcacheStatus.getBaseDir()); if (get != null) { size = get.longValue(); } else { LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir()); } } synchronized (baseDirNumberSubDir) { Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir()); if (get != null) { numberSubDir = get.intValue(); } else { LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir()); } } } // setting the cache size to a default of 10GB long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); long allowedNumberSubDir = conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT); if (allowedSize < size || allowedNumberSubDir < numberSubDir) { // try some cache deletions LOG.debug( "Start deleting released cache because" + " [size, allowedSize, numberSubDir, allowedNumberSubDir] =" + " [" + size + ", " + allowedSize + ", " + numberSubDir + ", " + allowedNumberSubDir + "]"); deleteCache(conf, asyncDiskService); } initSuccessful = true; return localizedPath; } finally { if (!initSuccessful) { synchronized (cachedArchives) { lcacheStatus.refcount--; } } } }