Beispiel #1
0
  private void checkLocalizedPath(boolean visibility)
      throws IOException, LoginException, InterruptedException {
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf, taskController);
    String userName = getJobOwnerName();
    File workDir = new File(TEST_ROOT_DIR, "workdir");
    Path cacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    if (visibility) {
      createPublicTempFile(cacheFile);
    } else {
      createPrivateTempFile(cacheFile);
    }

    Configuration conf1 = new Configuration(conf);
    conf1.set("user.name", userName);
    DistributedCache.addCacheFile(cacheFile.toUri(), conf1);
    TrackerDistributedCacheManager.determineTimestamps(conf1);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf1);
    dumpState(conf1);

    // Task localizing for job
    TaskDistributedCacheManager handle =
        manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1);
    handle.setupCache(
        conf1,
        TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    JobLocalizer.downloadPrivateCache(conf1);
    TaskDistributedCacheManager.CacheFile c = handle.getCacheFiles().get(0);
    String distCacheDir;
    if (visibility) {
      distCacheDir = TaskTracker.getPublicDistributedCacheDir();
    } else {
      distCacheDir = TaskTracker.getPrivateDistributedCacheDir(userName);
    }
    Path localizedPath =
        manager.getLocalCache(
            cacheFile.toUri(),
            conf1,
            distCacheDir,
            fs.getFileStatus(cacheFile),
            false,
            c.timestamp,
            visibility,
            c);
    assertTrue(
        "Cache file didn't get localized in the expected directory. "
            + "Expected localization to happen within "
            + ROOT_MAPRED_LOCAL_DIR
            + "/"
            + distCacheDir
            + ", but was localized at "
            + localizedPath,
        localizedPath.toString().contains(distCacheDir));
    if (visibility) {
      checkPublicFilePermissions(new Path[] {localizedPath});
    } else {
      checkFilePermissions(new Path[] {localizedPath});
    }
  }
Beispiel #2
0
  /**
   * This is the typical flow for using the DistributedCache classes.
   *
   * @throws IOException
   * @throws LoginException
   */
  public void testManagerFlow() throws IOException, LoginException {
    if (!canRun()) {
      return;
    }

    // ****** Imitate JobClient code
    // Configures a task/job with both a regular file and a "classpath" file.
    Configuration subConf = new Configuration(conf);
    String userName = getJobOwnerName();
    subConf.set("user.name", userName);
    JobID jobid = new JobID("jt", 1);
    DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf);
    DistributedCache.addFileToClassPath(secondCacheFile, subConf, FileSystem.get(subConf));
    TrackerDistributedCacheManager.determineTimestamps(subConf);
    TrackerDistributedCacheManager.determineCacheVisibilities(subConf);
    // ****** End of imitating JobClient code

    Path jobFile = new Path(TEST_ROOT_DIR, "job.xml");
    FileOutputStream os = new FileOutputStream(new File(jobFile.toString()));
    subConf.writeXml(os);
    os.close();

    // ****** Imitate TaskRunner code.
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf, taskController);
    TaskDistributedCacheManager handle = manager.newTaskDistributedCacheManager(jobid, subConf);
    assertNull(null, DistributedCache.getLocalCacheFiles(subConf));
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());
    handle.setupCache(
        subConf,
        TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    JobLocalizer.downloadPrivateCache(subConf);
    // DOESN'T ACTUALLY HAPPEN IN THE TaskRunner (THIS IS A TODO)
    //    handle.setupPrivateCache(localDirAllocator, TaskTracker
    //        .getPrivateDistributedCacheDir(userName));
    //    // ****** End of imitating TaskRunner code

    Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf);
    assertNotNull(null, localCacheFiles);
    assertEquals(2, localCacheFiles.length);
    Path cachedFirstFile = localCacheFiles[0];
    Path cachedSecondFile = localCacheFiles[1];
    assertFileLengthEquals(firstCacheFile, cachedFirstFile);
    assertFalse("Paths should be different.", firstCacheFile.equals(cachedFirstFile));

    assertEquals(1, handle.getClassPaths().size());
    assertEquals(cachedSecondFile.toString(), handle.getClassPaths().get(0));

    checkFilePermissions(localCacheFiles);

    // Cleanup
    handle.release();
    manager.purgeCache();
    assertFalse(pathToFile(cachedFirstFile).exists());
  }
Beispiel #3
0
  /** test delete cache */
  public void testDeleteCache() throws Exception {
    if (!canRun()) {
      return;
    }
    // This test needs mapred.local.dir to be single directory
    // instead of four, because it assumes that both
    // firstcachefile and secondcachefile will be localized on same directory
    // so that second localization triggers deleteCache.
    // If mapred.local.dir is four directories, second localization might not
    // trigger deleteCache, if it is localized in different directory.
    Configuration conf2 = new Configuration(conf);
    conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString());
    conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT);

    refreshConf(conf2);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf2, taskController);
    FileSystem localfs = FileSystem.getLocal(conf2);
    long now = System.currentTimeMillis();
    String userName = getJobOwnerName();
    conf2.set("user.name", userName);

    // We first test the size limit
    FileStatus stat = fs.getFileStatus(firstCacheFilePublic);
    CacheFile cfile1 =
        new CacheFile(
            firstCacheFilePublic.toUri(),
            CacheFile.FileType.REGULAR,
            true,
            stat.getModificationTime(),
            true);
    Path firstLocalCache =
        manager.getLocalCache(
            firstCacheFilePublic.toUri(),
            conf2,
            TaskTracker.getPrivateDistributedCacheDir(userName),
            fs.getFileStatus(firstCacheFilePublic),
            false,
            fs.getFileStatus(firstCacheFilePublic).getModificationTime(),
            true,
            cfile1);
    manager.releaseCache(cfile1.getStatus());
    // in above code,localized a file of size 4K and then release the cache
    // which will cause the cache be deleted when the limit goes out.
    // The below code localize another cache which's designed to
    // sweep away the first cache.
    stat = fs.getFileStatus(secondCacheFilePublic);
    CacheFile cfile2 =
        new CacheFile(
            secondCacheFilePublic.toUri(),
            CacheFile.FileType.REGULAR,
            true,
            stat.getModificationTime(),
            true);
    assertTrue(
        "DistributedCache currently doesn't have cached file", localfs.exists(firstLocalCache));
    Path secondLocalCache =
        manager.getLocalCache(
            secondCacheFilePublic.toUri(),
            conf2,
            TaskTracker.getPrivateDistributedCacheDir(userName),
            fs.getFileStatus(secondCacheFilePublic),
            false,
            fs.getFileStatus(secondCacheFilePublic).getModificationTime(),
            true,
            cfile2);
    assertFalse(
        "DistributedCache failed deleting old" + " cache when the cache store is full.",
        localfs.exists(firstLocalCache));

    // find the root directory of distributed caches
    Path firstCursor = firstLocalCache;
    Path secondCursor = secondLocalCache;

    while (!firstCursor.equals(secondCursor)) {
      // Debug code, to see what these things look like
      System.err.println("cursors: " + firstCursor);
      System.err.println(" and " + secondCursor);

      firstCursor = firstCursor.getParent();
      secondCursor = secondCursor.getParent();
    }

    System.err.println("The final cursor is " + firstCursor);

    System.err.println(
        "That directory ends up with "
            + localfs.listStatus(firstCursor).length
            + " subdirectories");
    Path cachesBase = firstCursor;

    assertFalse(
        "DistributedCache did not delete the gensym'ed distcache "
            + "directory names when it deleted the files they contained "
            + "because they collectively exceeded the size limit.",
        localfs.listStatus(cachesBase).length > 1);
    conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT * 10);
    conf2.setLong("mapreduce.tasktracker.local.cache.numberdirectories", LOCAL_CACHE_SUBDIR_LIMIT);
    manager = new TrackerDistributedCacheManager(conf2, taskController);
    // Now we test the number of sub directories limit
    // Create the temporary cache files to be used in the tests.
    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    // Adding two more small files, so it triggers the number of sub directory
    // limit but does not trigger the file size limit.
    createPrivateTempFile(thirdCacheFile);
    createPrivateTempFile(fourthCacheFile);
    DistributedCache.setCacheFiles(new URI[] {thirdCacheFile.toUri()}, conf2);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);
    TrackerDistributedCacheManager.determineTimestamps(conf2);
    stat = fs.getFileStatus(thirdCacheFile);
    CacheFile cfile3 =
        new CacheFile(
            thirdCacheFile.toUri(),
            CacheFile.FileType.REGULAR,
            false,
            stat.getModificationTime(),
            true);
    Path thirdLocalCache =
        manager.getLocalCache(
            thirdCacheFile.toUri(),
            conf2,
            TaskTracker.getPrivateDistributedCacheDir(userName),
            fs.getFileStatus(thirdCacheFile),
            false,
            fs.getFileStatus(thirdCacheFile).getModificationTime(),
            false,
            cfile3);
    DistributedCache.setLocalFiles(conf2, thirdLocalCache.toString());
    JobLocalizer.downloadPrivateCache(conf2);
    // Release the third cache so that it can be deleted while sweeping
    manager.releaseCache(cfile3.getStatus());
    // Getting the fourth cache will make the number of sub directories becomes
    // 3 which is greater than 2. So the released cache will be deleted.
    stat = fs.getFileStatus(fourthCacheFile);
    CacheFile cfile4 =
        new CacheFile(
            fourthCacheFile.toUri(),
            CacheFile.FileType.REGULAR,
            false,
            stat.getModificationTime(),
            true);
    assertTrue(
        "DistributedCache currently doesn't have cached file", localfs.exists(thirdLocalCache));

    DistributedCache.setCacheFiles(new URI[] {fourthCacheFile.toUri()}, conf2);
    DistributedCache.setLocalFiles(conf2, thirdCacheFile.toUri().toString());
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);
    TrackerDistributedCacheManager.determineTimestamps(conf2);
    Path fourthLocalCache =
        manager.getLocalCache(
            fourthCacheFile.toUri(),
            conf2,
            TaskTracker.getPrivateDistributedCacheDir(userName),
            fs.getFileStatus(fourthCacheFile),
            false,
            fs.getFileStatus(fourthCacheFile).getModificationTime(),
            false,
            cfile4);
    assertFalse(
        "DistributedCache failed deleting old"
            + " cache when the cache exceeds the number of sub directories limit.",
        localfs.exists(thirdLocalCache));

    assertFalse(
        "DistributedCache did not delete the gensym'ed distcache "
            + "directory names when it deleted the files they contained "
            + "because there were too many.",
        localfs.listStatus(cachesBase).length > LOCAL_CACHE_SUBDIR_LIMIT);

    // Clean up the files created in this test
    new File(thirdCacheFile.toString()).delete();
    new File(fourthCacheFile.toString()).delete();
  }
Beispiel #4
0
  public void testReferenceCount()
      throws IOException, LoginException, URISyntaxException, InterruptedException {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager = new FakeTrackerDistributedCacheManager(conf);

    String userName = getJobOwnerName();
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());

    // Configures a job with a regular file
    Job job1 = new Job(conf);
    Configuration conf1 = job1.getConfiguration();
    conf1.set("user.name", userName);
    DistributedCache.addCacheFile(secondCacheFile.toUri(), conf1);

    TrackerDistributedCacheManager.determineTimestamps(conf1);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf1);

    // Task localizing for first job
    TaskDistributedCacheManager handle =
        manager.newTaskDistributedCacheManager(new JobID("jt", 1), conf1);
    handle.setupCache(
        conf1,
        TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    JobLocalizer.downloadPrivateCache(conf1);
    handle.release();
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      assertEquals(0, manager.getReferenceCount(c.getStatus()));
    }

    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    createPrivateTempFile(thirdCacheFile);

    // Configures another job with three regular files.
    Job job2 = new Job(conf);
    Configuration conf2 = job2.getConfiguration();
    conf2.set("user.name", userName);
    // add a file that would get failed to localize
    DistributedCache.addCacheFile(firstCacheFilePublic.toUri(), conf2);
    // add a file that is already localized by different job
    DistributedCache.addCacheFile(secondCacheFile.toUri(), conf2);
    // add a file that is never localized
    DistributedCache.addCacheFile(thirdCacheFile.toUri(), conf2);

    TrackerDistributedCacheManager.determineTimestamps(conf2);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);

    // Task localizing for second job
    // localization for the "firstCacheFile" will fail.
    handle = manager.newTaskDistributedCacheManager(new JobID("jt", 2), conf2);
    Throwable th = null;
    try {
      handle.setupCache(
          conf2,
          TaskTracker.getPublicDistributedCacheDir(),
          TaskTracker.getPrivateDistributedCacheDir(userName));
      JobLocalizer.downloadPrivateCache(conf2);
    } catch (IOException e) {
      th = e;
      LOG.info("Exception during setup", e);
    }
    assertNotNull(th);
    assertTrue(th.getMessage().contains("fake fail"));
    handle.release();
    th = null;
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      try {
        int refcount = manager.getReferenceCount(c.getStatus());
        LOG.info("checking refcount " + c.uri + " of " + refcount);
        assertEquals(0, refcount);
      } catch (NullPointerException ie) {
        th = ie;
        LOG.info("Exception getting reference count for " + c.uri, ie);
      }
    }
    assertNotNull(th);
    fs.delete(thirdCacheFile, false);
  }