// Mostly for setting up the symlinks. Note that when we setup the distributed // cache, we didn't create the symlinks. This is done on a per task basis // by the currently executing task. public static void setupWorkDir(JobConf conf) throws IOException { File workDir = new File(".").getAbsoluteFile(); FileUtil.fullyDelete(workDir); if (DistributedCache.getSymlink(conf)) { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { String link = archives[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localArchives[i].toString(), link); } } } } if (files != null) { for (int i = 0; i < files.length; i++) { String link = files[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localFiles[i].toString(), link); } } } } } File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } // create symlinks for all the files in job cache dir in current // workingdir for streaming try { DistributedCache.createAllSymlink(conf, jobCacheDir, workDir); } catch (IOException ie) { // Do not exit even if symlinks have not been created. LOG.warn(StringUtils.stringifyException(ie)); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } }
// the method which actually copies the caches locally and unjars/unzips them // and does chmod for the files private static Path localizeCache( Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive) throws IOException { FileSystem fs = getFileSystem(cache, conf); FileSystem localFs = FileSystem.getLocal(conf); Path parchive = null; if (isArchive) { parchive = new Path( cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName())); } else { parchive = cacheStatus.localizedLoadPath; } if (!localFs.mkdirs(parchive.getParent())) { throw new IOException( "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString()); } String cacheId = cache.getPath(); fs.copyToLocalFile(new Path(cacheId), parchive); if (isArchive) { String tmpArchive = parchive.toString().toLowerCase(); File srcFile = new File(parchive.toString()); File destDir = new File(parchive.getParent().toString()); if (tmpArchive.endsWith(".jar")) { RunJar.unJar(srcFile, destDir); } else if (tmpArchive.endsWith(".zip")) { FileUtil.unZip(srcFile, destDir); } else if (isTarFile(tmpArchive)) { FileUtil.unTar(srcFile, destDir); } // else will not do anyhting // and copy the file into the dir as it is } long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString())); cacheStatus.size = cacheSize; addCacheInfoUpdate(cacheStatus); // do chmod here try { // Setting recursive permission to grant everyone read and execute Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir); LOG.info("Doing chmod on localdir :" + localDir); FileUtil.chmod(localDir.toString(), "ugo+rx", true); } catch (InterruptedException e) { LOG.warn("Exception in chmod" + e.toString()); } // update cacheStatus to reflect the newly cached file cacheStatus.mtime = getTimestamp(conf, cache); return cacheStatus.localizedLoadPath; }
/** * This method create symlinks for all files in a given dir in another directory * * @param conf the configuration * @param jobCacheDir the target directory for creating symlinks * @param workDir the directory in which the symlinks are created * @throws IOException */ public static void createAllSymlink(Configuration conf, File jobCacheDir, File workDir) throws IOException { if ((jobCacheDir == null || !jobCacheDir.isDirectory()) || workDir == null || (!workDir.isDirectory())) { return; } boolean createSymlink = getSymlink(conf); if (createSymlink) { File[] list = jobCacheDir.listFiles(); for (int i = 0; i < list.length; i++) { FileUtil.symLink( list[i].getAbsolutePath(), new File(workDir, list[i].getName()).toString()); } } }
private static void createSymlink( Configuration conf, URI cache, CacheStatus cacheStatus, boolean isArchive, Path currentWorkDir, boolean honorSymLinkConf) throws IOException { boolean doSymlink = honorSymLinkConf && DistributedCache.getSymlink(conf); if (cache.getFragment() == null) { doSymlink = false; } String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment(); File flink = new File(link); if (doSymlink) { if (!flink.exists()) { FileUtil.symLink(cacheStatus.localizedLoadPath.toString(), link); } } }