private String getJobSummary(FileContext fc, Path path) throws IOException {
   Path qPath = fc.makeQualified(path);
   FSDataInputStream in = fc.open(qPath);
   String jobSummaryString = in.readUTF();
   in.close();
   return jobSummaryString;
 }
Exemple #2
0
 public static Path getPreviousJobHistoryPath(
     Configuration conf, ApplicationAttemptId applicationAttemptId) throws IOException {
   String jobId = TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString();
   String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId);
   Path histDirPath = FileContext.getFileContext(conf).makeQualified(new Path(jobhistoryDir));
   FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf);
   return fc.makeQualified(
       JobHistoryUtils.getStagingJobHistoryFile(
           histDirPath, jobId, (applicationAttemptId.getAttemptId() - 1)));
 }
 private static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter)
     throws IOException {
   path = fc.makeQualified(path);
   List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
   RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
   while (fileStatusIter.hasNext()) {
     FileStatus fileStatus = fileStatusIter.next();
     Path filePath = fileStatus.getPath();
     if (fileStatus.isFile() && pathFilter.accept(filePath)) {
       jhStatusList.add(fileStatus);
     }
   }
   return jhStatusList;
 }
  /**
   * Clean up older history files.
   *
   * @throws IOException on any error trying to remove the entries.
   */
  @SuppressWarnings("unchecked")
  void clean() throws IOException {
    // TODO this should be replaced by something that knows about the directory
    // structure and will put less of a load on HDFS.
    long cutoff = System.currentTimeMillis() - maxHistoryAge;
    boolean halted = false;
    // TODO Delete YYYY/MM/DD directories.
    List<FileStatus> serialDirList = findTimestampedDirectories();
    // Sort in ascending order. Relies on YYYY/MM/DD/Serial
    Collections.sort(serialDirList);
    for (FileStatus serialDir : serialDirList) {
      List<FileStatus> historyFileList =
          scanDirectoryForHistoryFiles(serialDir.getPath(), doneDirFc);
      for (FileStatus historyFile : historyFileList) {
        JobIndexInfo jobIndexInfo =
            FileNameIndexUtils.getIndexInfo(historyFile.getPath().getName());
        long effectiveTimestamp = getEffectiveTimestamp(jobIndexInfo.getFinishTime(), historyFile);
        if (effectiveTimestamp <= cutoff) {
          HistoryFileInfo fileInfo = this.jobListCache.get(jobIndexInfo.getJobId());
          if (fileInfo == null) {
            String confFileName =
                JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId());

            fileInfo =
                new HistoryFileInfo(
                    historyFile.getPath(),
                    new Path(historyFile.getPath().getParent(), confFileName),
                    null,
                    jobIndexInfo,
                    true);
          }
          deleteJobFromDone(fileInfo);
        } else {
          halted = true;
          break;
        }
      }
      if (!halted) {
        doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true);
        removeDirectoryFromSerialNumberIndex(serialDir.getPath());
        existingDoneSubdirs.remove(serialDir.getPath());
      } else {
        break; // Don't scan any more directories.
      }
    }
  }
  @Test
  public void testHistoryParsingForFailedAttempts() throws Exception {
    LOG.info("STARTING testHistoryParsingForFailedAttempts");
    try {
      Configuration conf = new Configuration();
      conf.setClass(
          CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
          MyResolver.class,
          DNSToSwitchMapping.class);
      RackResolver.init(conf);
      MRApp app =
          new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), true);
      app.submit(conf);
      Job job = app.getContext().getAllJobs().values().iterator().next();
      JobId jobId = job.getID();
      app.waitForState(job, JobState.SUCCEEDED);

      // make sure all events are flushed
      app.waitForState(Service.STATE.STOPPED);

      String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);
      JobHistory jobHistory = new JobHistory();
      jobHistory.init(conf);

      JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId).getJobIndexInfo();
      String jobhistoryFileName = FileNameIndexUtils.getDoneFileName(jobIndexInfo);

      Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName);
      FSDataInputStream in = null;
      FileContext fc = null;
      try {
        fc = FileContext.getFileContext(conf);
        in = fc.open(fc.makeQualified(historyFilePath));
      } catch (IOException ioe) {
        LOG.info("Can not open history file: " + historyFilePath, ioe);
        throw (new Exception("Can not open History File"));
      }

      JobHistoryParser parser = new JobHistoryParser(in);
      JobInfo jobInfo = parser.parse();
      Exception parseException = parser.getParseException();
      Assert.assertNull("Caught an expected exception " + parseException, parseException);
      int noOffailedAttempts = 0;
      Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
      for (Task task : job.getTasks().values()) {
        TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
        for (TaskAttempt taskAttempt : task.getAttempts().values()) {
          TaskAttemptInfo taskAttemptInfo =
              taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
          // Verify rack-name for all task attempts
          Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
          if (taskAttemptInfo.getTaskStatus().equals("FAILED")) {
            noOffailedAttempts++;
          }
        }
      }
      Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts);
    } finally {
      LOG.info("FINISHED testHistoryParsingForFailedAttempts");
    }
  }
  private void checkHistoryParsing(
      final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name"));
    long amStartTimeEst = System.currentTimeMillis();
    conf.setClass(
        CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
        MyResolver.class,
        DNSToSwitchMapping.class);
    RackResolver.init(conf);
    MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true);
    app.submit(conf);
    Job job = app.getContext().getAllJobs().values().iterator().next();
    JobId jobId = job.getID();
    LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString());
    app.waitForState(job, JobState.SUCCEEDED);

    // make sure all events are flushed
    app.waitForState(Service.STATE.STOPPED);

    String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);

    FileContext fc = null;
    try {
      fc = FileContext.getFileContext(conf);
    } catch (IOException ioe) {
      LOG.info("Can not get FileContext", ioe);
      throw (new Exception("Can not get File Context"));
    }

    if (numMaps == numSuccessfulMaps) {
      String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId);
      Path summaryFile = new Path(jobhistoryDir, summaryFileName);
      String jobSummaryString = getJobSummary(fc, summaryFile);
      Assert.assertNotNull(jobSummaryString);
      Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100"));
      Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100"));

      Map<String, String> jobSummaryElements = new HashMap<String, String>();
      StringTokenizer strToken = new StringTokenizer(jobSummaryString, ",");
      while (strToken.hasMoreTokens()) {
        String keypair = strToken.nextToken();
        jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]);
      }

      Assert.assertEquals(
          "JobId does not match", jobId.toString(), jobSummaryElements.get("jobId"));
      Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName"));
      Assert.assertTrue(
          "submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0);
      Assert.assertTrue(
          "launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0);
      Assert.assertTrue(
          "firstMapTaskLaunchTime should not be 0",
          Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0);
      Assert.assertTrue(
          "firstReduceTaskLaunchTime should not be 0",
          Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0);
      Assert.assertTrue(
          "finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0);
      Assert.assertEquals(
          "Mismatch in num map slots",
          numSuccessfulMaps,
          Integer.parseInt(jobSummaryElements.get("numMaps")));
      Assert.assertEquals(
          "Mismatch in num reduce slots",
          numReduces,
          Integer.parseInt(jobSummaryElements.get("numReduces")));
      Assert.assertEquals(
          "User does not match", System.getProperty("user.name"), jobSummaryElements.get("user"));
      Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue"));
      Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status"));
    }

    JobHistory jobHistory = new JobHistory();
    jobHistory.init(conf);
    HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId);
    JobInfo jobInfo;
    long numFinishedMaps;

    synchronized (fileInfo) {
      Path historyFilePath = fileInfo.getHistoryFile();
      FSDataInputStream in = null;
      LOG.info("JobHistoryFile is: " + historyFilePath);
      try {
        in = fc.open(fc.makeQualified(historyFilePath));
      } catch (IOException ioe) {
        LOG.info("Can not open history file: " + historyFilePath, ioe);
        throw (new Exception("Can not open History File"));
      }

      JobHistoryParser parser = new JobHistoryParser(in);
      final EventReader realReader = new EventReader(in);
      EventReader reader = Mockito.mock(EventReader.class);
      if (numMaps == numSuccessfulMaps) {
        reader = realReader;
      } else {
        final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack!
        Mockito.when(reader.getNextEvent())
            .thenAnswer(
                new Answer<HistoryEvent>() {
                  public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
                    HistoryEvent event = realReader.getNextEvent();
                    if (event instanceof TaskFinishedEvent) {
                      numFinishedEvents.incrementAndGet();
                    }

                    if (numFinishedEvents.get() <= numSuccessfulMaps) {
                      return event;
                    } else {
                      throw new IOException("test");
                    }
                  }
                });
      }

      jobInfo = parser.parse(reader);

      numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps);

      if (numFinishedMaps != numMaps) {
        Exception parseException = parser.getParseException();
        Assert.assertNotNull("Didn't get expected parse exception", parseException);
      }
    }

    Assert.assertEquals(
        "Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername());
    Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname());
    Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName());
    Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath());
    Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps);
    Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces());
    Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized());
    Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
    int totalTasks = allTasks.size();
    Assert.assertEquals("total number of tasks is incorrect  ", (numMaps + numReduces), totalTasks);

    // Verify aminfo
    Assert.assertEquals(1, jobInfo.getAMInfos().size());
    Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost());
    AMInfo amInfo = jobInfo.getAMInfos().get(0);
    Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort());
    Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort());
    Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId());
    Assert.assertEquals(
        amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId());
    Assert.assertTrue(
        amInfo.getStartTime() <= System.currentTimeMillis()
            && amInfo.getStartTime() >= amStartTimeEst);

    ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1);
    // Assert at taskAttempt level
    for (TaskInfo taskInfo : allTasks.values()) {
      int taskAttemptCount = taskInfo.getAllTaskAttempts().size();
      Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount);
      TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next();
      Assert.assertNotNull(taInfo.getContainerId());
      // Verify the wrong ctor is not being used. Remove after mrv1 is removed.
      Assert.assertFalse(taInfo.getContainerId().equals(fakeCid));
    }

    // Deep compare Job and JobInfo
    for (Task task : job.getTasks().values()) {
      TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
      Assert.assertNotNull("TaskInfo not found", taskInfo);
      for (TaskAttempt taskAttempt : task.getAttempts().values()) {
        TaskAttemptInfo taskAttemptInfo =
            taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
        Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo);
        Assert.assertEquals(
            "Incorrect shuffle port for task attempt",
            taskAttempt.getShufflePort(),
            taskAttemptInfo.getShufflePort());
        if (numMaps == numSuccessfulMaps) {
          Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname());
          Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort());

          // Verify rack-name
          Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
        }
      }
    }
  }
 protected boolean deleteDir(FileStatus serialDir)
     throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
         IOException {
   return doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true);
 }
  @Override
  public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
      conf.set(
          MRJobConfig.MR_AM_STAGING_DIR,
          new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
      // nothing defined. set quick delete value
      conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
      String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
      LOG.info(message);
      throw new TezUncheckedException(message);
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
      Path stagingPath =
          FileContext.getFileContext(conf)
              .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
      /*
       * Re-configure the staging path on Windows if the file system is localFs.
       * We need to use a absolute path that contains the drive letter. The unit
       * test could run on a different drive than the AM. We can run into the
       * issue that job files are localized to the drive where the test runs on,
       * while the AM starts on a different drive and fails to find the job
       * metafiles. Using absolute path can avoid this ambiguity.
       */
      if (Path.WINDOWS) {
        if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
          conf.set(
              MRJobConfig.MR_AM_STAGING_DIR,
              new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
        }
      }
      FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
      if (fc.util().exists(stagingPath)) {
        LOG.info(stagingPath + " exists! deleting...");
        fc.delete(stagingPath, true);
      }
      LOG.info("mkdir: " + stagingPath);
      fc.mkdir(stagingPath, null, true);

      // mkdir done directory as well
      String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
      Path doneDirPath = fc.makeQualified(new Path(doneDir));
      fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
      throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    // configure the shuffle service in NM
    conf.setStrings(
        YarnConfiguration.NM_AUX_SERVICES,
        new String[] {ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID});
    conf.setClass(
        String.format(
            YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
        ShuffleHandler.class,
        Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(
        YarnConfiguration.NM_CONTAINER_EXECUTOR,
        DefaultContainerExecutor.class,
        ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
  }
 private Path getFullyQualifiedPath(String pathString) {
   return useFCOption
       ? mfc.makeQualified(new Path(ROOT_DIR, pathString))
       : mfs.makeQualified(new Path(ROOT_DIR, pathString));
 }