private String getJobSummary(FileContext fc, Path path) throws IOException { Path qPath = fc.makeQualified(path); FSDataInputStream in = fc.open(qPath); String jobSummaryString = in.readUTF(); in.close(); return jobSummaryString; }
public static Path getPreviousJobHistoryPath( Configuration conf, ApplicationAttemptId applicationAttemptId) throws IOException { String jobId = TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString(); String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId); Path histDirPath = FileContext.getFileContext(conf).makeQualified(new Path(jobhistoryDir)); FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf); return fc.makeQualified( JobHistoryUtils.getStagingJobHistoryFile( histDirPath, jobId, (applicationAttemptId.getAttemptId() - 1))); }
private static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } return jhStatusList; }
/** * Clean up older history files. * * @throws IOException on any error trying to remove the entries. */ @SuppressWarnings("unchecked") void clean() throws IOException { // TODO this should be replaced by something that knows about the directory // structure and will put less of a load on HDFS. long cutoff = System.currentTimeMillis() - maxHistoryAge; boolean halted = false; // TODO Delete YYYY/MM/DD directories. List<FileStatus> serialDirList = findTimestampedDirectories(); // Sort in ascending order. Relies on YYYY/MM/DD/Serial Collections.sort(serialDirList); for (FileStatus serialDir : serialDirList) { List<FileStatus> historyFileList = scanDirectoryForHistoryFiles(serialDir.getPath(), doneDirFc); for (FileStatus historyFile : historyFileList) { JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(historyFile.getPath().getName()); long effectiveTimestamp = getEffectiveTimestamp(jobIndexInfo.getFinishTime(), historyFile); if (effectiveTimestamp <= cutoff) { HistoryFileInfo fileInfo = this.jobListCache.get(jobIndexInfo.getJobId()); if (fileInfo == null) { String confFileName = JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId()); fileInfo = new HistoryFileInfo( historyFile.getPath(), new Path(historyFile.getPath().getParent(), confFileName), null, jobIndexInfo, true); } deleteJobFromDone(fileInfo); } else { halted = true; break; } } if (!halted) { doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true); removeDirectoryFromSerialNumberIndex(serialDir.getPath()); existingDoneSubdirs.remove(serialDir.getPath()); } else { break; // Don't scan any more directories. } } }
@Test public void testHistoryParsingForFailedAttempts() throws Exception { LOG.info("STARTING testHistoryParsingForFailedAttempts"); try { Configuration conf = new Configuration(); conf.setClass( CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); MRApp app = new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); app.waitForState(job, JobState.SUCCEEDED); // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId).getJobIndexInfo(); String jobhistoryFileName = FileNameIndexUtils.getDoneFileName(jobIndexInfo); Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); FSDataInputStream in = null; FileContext fc = null; try { fc = FileContext.getFileContext(conf); in = fc.open(fc.makeQualified(historyFilePath)); } catch (IOException ioe) { LOG.info("Can not open history file: " + historyFilePath, ioe); throw (new Exception("Can not open History File")); } JobHistoryParser parser = new JobHistoryParser(in); JobInfo jobInfo = parser.parse(); Exception parseException = parser.getParseException(); Assert.assertNull("Caught an expected exception " + parseException, parseException); int noOffailedAttempts = 0; Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks(); for (Task task : job.getTasks().values()) { TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID()))); // Verify rack-name for all task attempts Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME); if (taskAttemptInfo.getTaskStatus().equals("FAILED")) { noOffailedAttempts++; } } } Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts); } finally { LOG.info("FINISHED testHistoryParsingForFailedAttempts"); } }
private void checkHistoryParsing( final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); long amStartTimeEst = System.currentTimeMillis(); conf.setClass( CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); app.waitForState(job, JobState.SUCCEEDED); // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); FileContext fc = null; try { fc = FileContext.getFileContext(conf); } catch (IOException ioe) { LOG.info("Can not get FileContext", ioe); throw (new Exception("Can not get File Context")); } if (numMaps == numSuccessfulMaps) { String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId); Path summaryFile = new Path(jobhistoryDir, summaryFileName); String jobSummaryString = getJobSummary(fc, summaryFile); Assert.assertNotNull(jobSummaryString); Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100")); Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100")); Map<String, String> jobSummaryElements = new HashMap<String, String>(); StringTokenizer strToken = new StringTokenizer(jobSummaryString, ","); while (strToken.hasMoreTokens()) { String keypair = strToken.nextToken(); jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]); } Assert.assertEquals( "JobId does not match", jobId.toString(), jobSummaryElements.get("jobId")); Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName")); Assert.assertTrue( "submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0); Assert.assertTrue( "launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0); Assert.assertTrue( "firstMapTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0); Assert.assertTrue( "firstReduceTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0); Assert.assertTrue( "finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0); Assert.assertEquals( "Mismatch in num map slots", numSuccessfulMaps, Integer.parseInt(jobSummaryElements.get("numMaps"))); Assert.assertEquals( "Mismatch in num reduce slots", numReduces, Integer.parseInt(jobSummaryElements.get("numReduces"))); Assert.assertEquals( "User does not match", System.getProperty("user.name"), jobSummaryElements.get("user")); Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue")); Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status")); } JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); JobInfo jobInfo; long numFinishedMaps; synchronized (fileInfo) { Path historyFilePath = fileInfo.getHistoryFile(); FSDataInputStream in = null; LOG.info("JobHistoryFile is: " + historyFilePath); try { in = fc.open(fc.makeQualified(historyFilePath)); } catch (IOException ioe) { LOG.info("Can not open history file: " + historyFilePath, ioe); throw (new Exception("Can not open History File")); } JobHistoryParser parser = new JobHistoryParser(in); final EventReader realReader = new EventReader(in); EventReader reader = Mockito.mock(EventReader.class); if (numMaps == numSuccessfulMaps) { reader = realReader; } else { final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack! Mockito.when(reader.getNextEvent()) .thenAnswer( new Answer<HistoryEvent>() { public HistoryEvent answer(InvocationOnMock invocation) throws IOException { HistoryEvent event = realReader.getNextEvent(); if (event instanceof TaskFinishedEvent) { numFinishedEvents.incrementAndGet(); } if (numFinishedEvents.get() <= numSuccessfulMaps) { return event; } else { throw new IOException("test"); } } }); } jobInfo = parser.parse(reader); numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps); if (numFinishedMaps != numMaps) { Exception parseException = parser.getParseException(); Assert.assertNotNull("Didn't get expected parse exception", parseException); } } Assert.assertEquals( "Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername()); Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname()); Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName()); Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath()); Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps); Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces()); Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized()); Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks(); int totalTasks = allTasks.size(); Assert.assertEquals("total number of tasks is incorrect ", (numMaps + numReduces), totalTasks); // Verify aminfo Assert.assertEquals(1, jobInfo.getAMInfos().size()); Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost()); AMInfo amInfo = jobInfo.getAMInfos().get(0); Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort()); Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort()); Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId()); Assert.assertEquals( amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId()); Assert.assertTrue( amInfo.getStartTime() <= System.currentTimeMillis() && amInfo.getStartTime() >= amStartTimeEst); ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1); // Assert at taskAttempt level for (TaskInfo taskInfo : allTasks.values()) { int taskAttemptCount = taskInfo.getAllTaskAttempts().size(); Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount); TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next(); Assert.assertNotNull(taInfo.getContainerId()); // Verify the wrong ctor is not being used. Remove after mrv1 is removed. Assert.assertFalse(taInfo.getContainerId().equals(fakeCid)); } // Deep compare Job and JobInfo for (Task task : job.getTasks().values()) { TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); Assert.assertNotNull("TaskInfo not found", taskInfo); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID()))); Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo); Assert.assertEquals( "Incorrect shuffle port for task attempt", taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort()); if (numMaps == numSuccessfulMaps) { Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname()); Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort()); // Verify rack-name Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME); } } } }
protected boolean deleteDir(FileStatus serialDir) throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException, IOException { return doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true); }
@Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); // blacklisting disabled to prevent scheduling issues conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set( MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath()); } if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) { // nothing defined. set quick delete value conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l); } File appJarLocalFile = new File(MiniTezCluster.APPJAR); if (!appJarLocalFile.exists()) { String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting."; LOG.info(message); throw new TezUncheckedException(message); } FileSystem fs = FileSystem.get(conf); Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir")); Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar"); // Copy AppJar and make it public. Path appMasterJar = new Path(MiniTezCluster.APPJAR); fs.copyFromLocalFile(appMasterJar, appRemoteJar); fs.setPermission(appRemoteJar, new FsPermission("777")); conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString()); LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS)); // VMEM monitoring disabled, PMEM monitoring enabled. conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); /* * Re-configure the staging path on Windows if the file system is localFs. * We need to use a absolute path that contains the drive letter. The unit * test could run on a different drive than the AM. We can run into the * issue that job files are localized to the drive where the test runs on, * while the AM starts on a different drive and fails to find the job * metafiles. Using absolute path can avoid this ambiguity. */ if (Path.WINDOWS) { if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { conf.set( MRJobConfig.MR_AM_STAGING_DIR, new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath()); } } FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); fc.mkdir(stagingPath, null, true); // mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new TezUncheckedException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); // configure the shuffle service in NM conf.setStrings( YarnConfiguration.NM_AUX_SERVICES, new String[] {ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID}); conf.setClass( String.format( YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); // Non-standard shuffle port conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.setClass( YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); super.serviceInit(conf); }
private Path getFullyQualifiedPath(String pathString) { return useFCOption ? mfc.makeQualified(new Path(ROOT_DIR, pathString)) : mfs.makeQualified(new Path(ROOT_DIR, pathString)); }