Exemplo n.º 1
0
  /**
   * Creates distributed cache symlinks and tmp directory, as appropriate. Note that when we setup
   * the distributed cache, we didn't create the symlinks. This is done on a per task basis by the
   * currently executing task.
   *
   * @param conf The job configuration.
   * @param workDir Working directory, which is completely deleted.
   */
  public static void setupWorkDir(JobConf conf, File workDir) throws IOException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Fully deleting contents of " + workDir);
    }

    /**
     * delete only the contents of workDir leaving the directory empty. We can't delete the workDir
     * as it is the current working directory.
     */
    FileUtil.fullyDeleteContents(workDir);

    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          String target = localArchives[i].toString();
          symlink(workDir, target, link);
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          String target = localFiles[i].toString();
          symlink(workDir, target, link);
        }
      }
    }

    if (conf.getJar() != null) {
      File jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());

      // create symlinks for all the files in job cache dir in current
      // workingdir for streaming
      try {
        TrackerDistributedCacheManager.createAllSymlink(conf, jobCacheDir, workDir);
      } catch (IOException ie) {
        // Do not exit even if symlinks have not been created.
        LOG.warn(StringUtils.stringifyException(ie));
      }
    }

    createChildTmpDir(workDir, conf, true);
  }