@Override
 public void run() {
   if (LOG.isDebugEnabled()) {
     LOG.debug(this);
   }
   boolean error = false;
   if (null == user) {
     if (baseDirs == null || baseDirs.size() == 0) {
       LOG.debug("NM deleting absolute path : " + subDir);
       try {
         lfs.delete(subDir, true);
       } catch (IOException e) {
         error = true;
         LOG.warn("Failed to delete " + subDir);
       }
     } else {
       for (Path baseDir : baseDirs) {
         Path del = subDir == null ? baseDir : new Path(baseDir, subDir);
         LOG.debug("NM deleting path : " + del);
         try {
           lfs.delete(del, true);
         } catch (IOException e) {
           error = true;
           LOG.warn("Failed to delete " + subDir);
         }
       }
     }
   } else {
     try {
       LOG.debug("Deleting path: [" + subDir + "] as user: [" + user + "]");
       if (baseDirs == null || baseDirs.size() == 0) {
         delService.exec.deleteAsUser(user, subDir, (Path[]) null);
       } else {
         delService.exec.deleteAsUser(user, subDir, baseDirs.toArray(new Path[0]));
       }
     } catch (IOException e) {
       error = true;
       LOG.warn("Failed to delete as user " + user, e);
     } catch (InterruptedException e) {
       error = true;
       LOG.warn("Failed to delete as user " + user, e);
     }
   }
   if (error) {
     setSuccess(!error);
   }
   fileDeletionTaskFinished();
 }
 @BeforeClass
 public static void setup()
     throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
         IOException {
   localFS = FileContext.getLocalFSFileContext();
   localFS.delete(new Path(localFSDirBase.getAbsolutePath()), true);
   localFSDirBase.mkdirs();
   // Do not start cluster here
 }
  /**
   * Clean up older history files.
   *
   * @throws IOException on any error trying to remove the entries.
   */
  @SuppressWarnings("unchecked")
  void clean() throws IOException {
    // TODO this should be replaced by something that knows about the directory
    // structure and will put less of a load on HDFS.
    long cutoff = System.currentTimeMillis() - maxHistoryAge;
    boolean halted = false;
    // TODO Delete YYYY/MM/DD directories.
    List<FileStatus> serialDirList = findTimestampedDirectories();
    // Sort in ascending order. Relies on YYYY/MM/DD/Serial
    Collections.sort(serialDirList);
    for (FileStatus serialDir : serialDirList) {
      List<FileStatus> historyFileList =
          scanDirectoryForHistoryFiles(serialDir.getPath(), doneDirFc);
      for (FileStatus historyFile : historyFileList) {
        JobIndexInfo jobIndexInfo =
            FileNameIndexUtils.getIndexInfo(historyFile.getPath().getName());
        long effectiveTimestamp = getEffectiveTimestamp(jobIndexInfo.getFinishTime(), historyFile);
        if (effectiveTimestamp <= cutoff) {
          HistoryFileInfo fileInfo = this.jobListCache.get(jobIndexInfo.getJobId());
          if (fileInfo == null) {
            String confFileName =
                JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId());

            fileInfo =
                new HistoryFileInfo(
                    historyFile.getPath(),
                    new Path(historyFile.getPath().getParent(), confFileName),
                    null,
                    jobIndexInfo,
                    true);
          }
          deleteJobFromDone(fileInfo);
        } else {
          halted = true;
          break;
        }
      }
      if (!halted) {
        doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true);
        removeDirectoryFromSerialNumberIndex(serialDir.getPath());
        existingDoneSubdirs.remove(serialDir.getPath());
      } else {
        break; // Don't scan any more directories.
      }
    }
  }
Exemple #4
0
  @Test
  public void testSuccessfulContainerLaunch()
      throws InterruptedException, IOException, YarnException {

    FileContext localFS = FileContext.getLocalFSFileContext();

    localFS.delete(new Path(localDir.getAbsolutePath()), true);
    localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
    localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
    localDir.mkdir();
    localLogDir.mkdir();
    remoteLogDir.mkdir();

    YarnConfiguration conf = new YarnConfiguration();

    Context context =
        new NMContext(
            new NMContainerTokenSecretManager(conf),
            new NMTokenSecretManagerInNM(),
            null,
            null,
            new NMNullStateStoreService()) {
          @Override
          public int getHttpPort() {
            return 1234;
          }
        };

    conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());

    ContainerExecutor exec = new DefaultContainerExecutor();
    exec.setConf(conf);

    DeletionService del = new DeletionService(exec);
    Dispatcher dispatcher = new AsyncDispatcher();
    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
    NodeHealthCheckerService healthChecker =
        new NodeHealthCheckerService(NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
    healthChecker.init(conf);
    NodeManagerMetrics metrics = NodeManagerMetrics.create();
    NodeStatusUpdater nodeStatusUpdater =
        new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {
          @Override
          protected ResourceTracker getRMClient() {
            return new LocalRMInterface();
          };

          @Override
          protected void stopRMProxy() {
            return;
          }

          @Override
          protected void startStatusUpdater() {
            return; // Don't start any updating thread.
          }

          @Override
          public long getRMIdentifier() {
            return SIMULATED_RM_IDENTIFIER;
          }
        };

    DummyContainerManager containerManager =
        new DummyContainerManager(
            context,
            exec,
            del,
            nodeStatusUpdater,
            metrics,
            new ApplicationACLsManager(conf),
            dirsHandler);
    nodeStatusUpdater.init(conf);
    ((NMContext) context).setContainerManager(containerManager);
    nodeStatusUpdater.start();
    containerManager.init(conf);
    containerManager.start();

    ContainerLaunchContext launchContext =
        recordFactory.newRecordInstance(ContainerLaunchContext.class);
    ApplicationId applicationId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0);
    ContainerId cID = ContainerId.newContainerId(applicationAttemptId, 0);

    String user = "******";
    StartContainerRequest scRequest =
        StartContainerRequest.newInstance(
            launchContext,
            TestContainerManager.createContainerToken(
                cID,
                SIMULATED_RM_IDENTIFIER,
                context.getNodeId(),
                user,
                context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);

    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.RUNNING);

    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cID);
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
    containerManager.stopContainers(stopRequest);
    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE);

    containerManager.stop();
  }
 protected boolean deleteDir(FileStatus serialDir)
     throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
         IOException {
   return doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true);
 }
  @Override
  public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
      conf.set(
          MRJobConfig.MR_AM_STAGING_DIR,
          new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
      // nothing defined. set quick delete value
      conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
      String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
      LOG.info(message);
      throw new TezUncheckedException(message);
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
      Path stagingPath =
          FileContext.getFileContext(conf)
              .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
      /*
       * Re-configure the staging path on Windows if the file system is localFs.
       * We need to use a absolute path that contains the drive letter. The unit
       * test could run on a different drive than the AM. We can run into the
       * issue that job files are localized to the drive where the test runs on,
       * while the AM starts on a different drive and fails to find the job
       * metafiles. Using absolute path can avoid this ambiguity.
       */
      if (Path.WINDOWS) {
        if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
          conf.set(
              MRJobConfig.MR_AM_STAGING_DIR,
              new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
        }
      }
      FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
      if (fc.util().exists(stagingPath)) {
        LOG.info(stagingPath + " exists! deleting...");
        fc.delete(stagingPath, true);
      }
      LOG.info("mkdir: " + stagingPath);
      fc.mkdir(stagingPath, null, true);

      // mkdir done directory as well
      String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
      Path doneDirPath = fc.makeQualified(new Path(doneDir));
      fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
      throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    // configure the shuffle service in NM
    conf.setStrings(
        YarnConfiguration.NM_AUX_SERVICES,
        new String[] {ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID});
    conf.setClass(
        String.format(
            YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
        ShuffleHandler.class,
        Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(
        YarnConfiguration.NM_CONTAINER_EXECUTOR,
        DefaultContainerExecutor.class,
        ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
  }
  @Test
  public void testSuccessfulContainerLaunch() throws InterruptedException, IOException {

    FileContext localFS = FileContext.getLocalFSFileContext();

    localFS.delete(new Path(localDir.getAbsolutePath()), true);
    localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
    localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
    localDir.mkdir();
    localLogDir.mkdir();
    remoteLogDir.mkdir();

    Context context = new NMContext();

    YarnConfiguration conf = new YarnConfiguration();
    conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());

    ContainerExecutor exec = new DefaultContainerExecutor();
    exec.setConf(conf);

    DeletionService del = new DeletionService(exec);
    Dispatcher dispatcher = new AsyncDispatcher();
    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
    healthChecker.init(conf);
    LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
    NodeManagerMetrics metrics = NodeManagerMetrics.create();
    ContainerTokenSecretManager containerTokenSecretManager = new ContainerTokenSecretManager();
    NodeStatusUpdater nodeStatusUpdater =
        new NodeStatusUpdaterImpl(
            context, dispatcher, healthChecker, metrics, containerTokenSecretManager) {
          @Override
          protected ResourceTracker getRMClient() {
            return new LocalRMInterface();
          };

          @Override
          protected void startStatusUpdater() {
            return; // Don't start any updating thread.
          }
        };

    DummyContainerManager containerManager =
        new DummyContainerManager(
            context,
            exec,
            del,
            nodeStatusUpdater,
            metrics,
            containerTokenSecretManager,
            new ApplicationACLsManager(conf),
            dirsHandler);
    containerManager.init(conf);
    containerManager.start();

    ContainerLaunchContext launchContext =
        recordFactory.newRecordInstance(ContainerLaunchContext.class);
    ContainerId cID = recordFactory.newRecordInstance(ContainerId.class);
    ApplicationId applicationId = recordFactory.newRecordInstance(ApplicationId.class);
    applicationId.setClusterTimestamp(0);
    applicationId.setId(0);
    ApplicationAttemptId applicationAttemptId =
        recordFactory.newRecordInstance(ApplicationAttemptId.class);
    applicationAttemptId.setApplicationId(applicationId);
    applicationAttemptId.setAttemptId(0);
    cID.setApplicationAttemptId(applicationAttemptId);
    launchContext.setContainerId(cID);
    launchContext.setUser("testing");
    launchContext.setResource(recordFactory.newRecordInstance(Resource.class));
    StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class);
    request.setContainerLaunchContext(launchContext);
    containerManager.startContainer(request);

    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.RUNNING);

    StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
    stopRequest.setContainerId(cID);
    containerManager.stopContainer(stopRequest);
    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE);

    containerManager.stop();
  }