예제 #1
0
 /**
  * Clears all the logs in userlog directory.
  *
  * <p>Adds the job directories for deletion with default retain hours. Deletes all other
  * directories, if any. This is usually called on reinit/restart of the TaskTracker
  *
  * @param conf
  * @throws IOException
  */
 void clearOldUserLogs(Configuration conf) throws IOException {
   File userLogDir = TaskLog.getUserLogDir();
   if (userLogDir.exists()) {
     String[] logDirs = userLogDir.list();
     if (logDirs.length > 0) {
       // add all the log dirs to taskLogsMnonitor.
       long now = clock.getTime();
       for (String logDir : logDirs) {
         if (logDir.equals(logAsyncDisk.TOBEDELETED)) {
           // skip this
           continue;
         }
         JobID jobid = null;
         try {
           jobid = JobID.forName(logDir);
         } catch (IllegalArgumentException ie) {
           // if the directory is not a jobid, delete it immediately
           deleteLogPath(new File(userLogDir, logDir).getAbsolutePath());
           continue;
         }
         // add the job log directory with default retain hours, if it is not
         // already added
         if (!completedJobs.containsKey(jobid)) {
           markJobLogsForDeletion(now, conf, jobid);
         }
       }
     }
   }
 }
예제 #2
0
 UserLogCleaner(Configuration conf) throws IOException {
   threadSleepTime = conf.getLong(TTConfig.TT_USERLOGCLEANUP_SLEEPTIME, DEFAULT_THREAD_SLEEP_TIME);
   logAsyncDisk =
       new MRAsyncDiskService(FileSystem.getLocal(conf), TaskLog.getUserLogDir().toString());
   setClock(new Clock());
 }
 @After
 public void tearDown() throws IOException {
   FileUtil.fullyDelete(TaskLog.getUserLogDir());
   FileUtil.fullyDelete(new File(TEST_ROOT_DIR));
 }
public class TestUserLogCleanup {
  private static String jtid = "test";
  private static long ONE_HOUR = 1000 * 60 * 60;
  private Localizer localizer;
  private UserLogManager userLogManager;
  private UserLogCleaner userLogCleaner;
  private TaskTracker tt;
  private FakeClock myClock;
  private JobID jobid1 = new JobID(jtid, 1);
  private JobID jobid2 = new JobID(jtid, 2);
  private JobID jobid3 = new JobID(jtid, 3);
  private JobID jobid4 = new JobID(jtid, 4);
  private File foo = new File(TaskLog.getUserLogDir(), "foo");
  private File bar = new File(TaskLog.getUserLogDir(), "bar");
  private static String TEST_ROOT_DIR = System.getProperty("test.build.data", "/tmp");

  public TestUserLogCleanup() throws IOException, InterruptedException {
    JobConf conf = new JobConf();
    startTT(conf);
  }

  @After
  public void tearDown() throws IOException {
    FileUtil.fullyDelete(TaskLog.getUserLogDir());
    FileUtil.fullyDelete(new File(TEST_ROOT_DIR));
  }

  private File localizeJob(JobID jobid) throws IOException {
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    new JobLocalizer(tt.getJobConf(), user, jobid.toString()).initializeJobLogDir();
    File jobUserlog = TaskLog.getJobDir(jobid);
    JobConf conf = new JobConf();
    // localize job log directory
    tt.saveLogDir(jobid, conf);
    assertTrue(jobUserlog + " directory is not created.", jobUserlog.exists());
    return jobUserlog;
  }

  private void jobFinished(JobID jobid, int logRetainHours) {
    JobCompletedEvent jce = new JobCompletedEvent(jobid, myClock.getTime(), logRetainHours);
    userLogManager.addLogEvent(jce);
  }

  private void startTT(JobConf conf) throws IOException, InterruptedException {
    myClock = new FakeClock(); // clock is reset.
    String localdirs = TEST_ROOT_DIR + "/userlogs/local/0," + TEST_ROOT_DIR + "/userlogs/local/1";
    conf.set(JobConf.MAPRED_LOCAL_DIR_PROPERTY, localdirs);
    tt = new TaskTracker();
    tt.setConf(new JobConf(conf));
    LocalDirAllocator localDirAllocator = new LocalDirAllocator("mapred.local.dir");
    tt.setLocalDirAllocator(localDirAllocator);
    LocalStorage localStorage = new LocalStorage(conf.getLocalDirs());
    localStorage.checkLocalDirs();
    tt.setLocalStorage(localStorage);
    localizer =
        new Localizer(FileSystem.get(conf), conf.getStrings(JobConf.MAPRED_LOCAL_DIR_PROPERTY));
    tt.setLocalizer(localizer);
    userLogManager = new UtilsForTests.InLineUserLogManager(conf);
    TaskController taskController = userLogManager.getTaskController();
    taskController.setup(localDirAllocator, localStorage);
    tt.setTaskController(taskController);
    userLogCleaner = userLogManager.getUserLogCleaner();
    userLogCleaner.setClock(myClock);
    tt.setUserLogManager(userLogManager);
    userLogManager.clearOldUserLogs(conf);
  }

  private void ttReinited() throws IOException {
    JobConf conf = new JobConf();
    conf.setInt(JobContext.USER_LOG_RETAIN_HOURS, 3);
    userLogManager.clearOldUserLogs(conf);
  }

  private void ttRestarted() throws IOException, InterruptedException {
    JobConf conf = new JobConf();
    conf.setInt(JobContext.USER_LOG_RETAIN_HOURS, 3);
    startTT(conf);
  }

  /**
   * Tests job user-log directory deletion.
   *
   * <p>Adds two jobs for log deletion. One with one hour retain hours, other with two retain hours.
   * After an hour, TaskLogCleanupThread.processCompletedJobs() call, makes sure job with 1hr retain
   * hours is removed and other is retained. After one more hour, job with 2hr retain hours is also
   * removed.
   *
   * @throws IOException
   */
  @Test
  public void testJobLogCleanup() throws IOException {
    File jobUserlog1 = localizeJob(jobid1);
    File jobUserlog2 = localizeJob(jobid2);

    // add job user log directory for deletion, with 2 hours for deletion
    jobFinished(jobid1, 2);

    // add the job for deletion with one hour as retain hours
    jobFinished(jobid2, 1);

    // remove old logs and see jobid1 is not removed and jobid2 is removed
    myClock.advance(ONE_HOUR);
    userLogCleaner.processCompletedJobs();
    assertTrue(jobUserlog1 + " got deleted", jobUserlog1.exists());
    assertFalse(jobUserlog2 + " still exists.", jobUserlog2.exists());

    myClock.advance(ONE_HOUR);
    // remove old logs and see jobid1 is removed now
    userLogCleaner.processCompletedJobs();
    assertFalse(jobUserlog1 + " still exists.", jobUserlog1.exists());
  }

  /**
   * Tests user-log directory cleanup on a TT re-init with 3 hours as log retain hours for tracker.
   *
   * <p>Adds job1 deletion before the re-init with 2 hour retain hours. Adds job2 for which there
   * are no tasks/killJobAction after the re-init. Adds job3 for which there is localizeJob followed
   * by killJobAction with 3 hours as retain hours. Adds job4 for which there are some tasks after
   * the re-init.
   *
   * @throws IOException
   */
  @Test
  public void testUserLogCleanup() throws IOException {
    File jobUserlog1 = localizeJob(jobid1);
    File jobUserlog2 = localizeJob(jobid2);
    File jobUserlog3 = localizeJob(jobid3);
    File jobUserlog4 = localizeJob(jobid4);
    // create a some files/dirs in userlog
    foo.mkdirs();
    bar.createNewFile();

    // add the jobid1 for deletion with retainhours = 2
    jobFinished(jobid1, 2);

    // time is now 1.
    myClock.advance(ONE_HOUR);

    // mimic TaskTracker reinit
    // re-init the tt with 3 hours as user log retain hours.
    // This re-init clears the user log directory
    // job directories will be added with 3 hours as retain hours.
    // i.e. They will be deleted at time 4.
    ttReinited();

    assertFalse(foo.exists());
    assertFalse(bar.exists());
    assertTrue(jobUserlog1.exists());
    assertTrue(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    myClock.advance(ONE_HOUR);
    // time is now 2.
    userLogCleaner.processCompletedJobs();
    assertFalse(jobUserlog1.exists());
    assertTrue(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    // mimic localizeJob followed KillJobAction for jobid3
    // add the job for deletion with retainhours = 3.
    // jobid3 should be deleted at time 5.
    jobUserlog3 = localizeJob(jobid3);
    jobFinished(jobid3, 3);

    // mimic localizeJob for jobid4
    jobUserlog4 = localizeJob(jobid4);

    // do cleanup
    myClock.advance(2 * ONE_HOUR);
    // time is now 4.
    userLogCleaner.processCompletedJobs();

    // jobid2 will be deleted
    assertFalse(jobUserlog1.exists());
    assertFalse(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    myClock.advance(ONE_HOUR);
    // time is now 5.
    // do cleanup again
    userLogCleaner.processCompletedJobs();

    // jobid3 will be deleted
    assertFalse(jobUserlog1.exists());
    assertFalse(jobUserlog2.exists());
    assertFalse(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());
  }

  /**
   * Tests user-log directory cleanup on a TT restart.
   *
   * <p>Adds job1 deletion before the restart with 2 hour retain hours. Adds job2 for which there
   * are no tasks/killJobAction after the restart. Adds job3 for which there is localizeJob followed
   * by killJobAction after the restart with 3 hours retain hours. Adds job4 for which there are
   * some tasks after the restart.
   *
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testUserLogCleanupAfterRestart() throws IOException, InterruptedException {
    File jobUserlog1 = localizeJob(jobid1);
    File jobUserlog2 = localizeJob(jobid2);
    File jobUserlog3 = localizeJob(jobid3);
    File jobUserlog4 = localizeJob(jobid4);
    // create a some files/dirs in userlog
    foo.mkdirs();
    bar.createNewFile();

    // add the jobid1 for deletion with retain hours = 2
    jobFinished(jobid1, 2);

    // time is now 1.
    myClock.advance(ONE_HOUR);

    // Mimic the TaskTracker restart
    // Restart the tt with 3 hours as user log retain hours.
    // This restart clears the user log directory
    // job directories will be added with 3 hours as retain hours.
    // i.e. They will be deleted at time 3 as clock will reset after the restart
    ttRestarted();

    assertFalse(foo.exists());
    assertFalse(bar.exists());
    assertTrue(jobUserlog1.exists());
    assertTrue(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    myClock.advance(ONE_HOUR);
    // time is now 1.
    userLogCleaner.processCompletedJobs();
    assertTrue(jobUserlog1.exists());
    assertTrue(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    // mimic localizeJob followed KillJobAction for jobid3
    // add the job for deletion with retainhours = 3.
    // jobid3 should be deleted at time 4.
    jobUserlog3 = localizeJob(jobid3);
    jobFinished(jobid3, 3);

    // mimic localizeJob for jobid4
    jobUserlog4 = localizeJob(jobid4);

    // do cleanup
    myClock.advance(2 * ONE_HOUR);
    // time is now 3.
    userLogCleaner.processCompletedJobs();

    // jobid1 and jobid2 will be deleted
    assertFalse(jobUserlog1.exists());
    assertFalse(jobUserlog2.exists());
    assertTrue(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());

    myClock.advance(ONE_HOUR);
    // time is now 4.
    // do cleanup again
    userLogCleaner.processCompletedJobs();

    // jobid3 will be deleted
    assertFalse(jobUserlog1.exists());
    assertFalse(jobUserlog2.exists());
    assertFalse(jobUserlog3.exists());
    assertTrue(jobUserlog4.exists());
  }
}