public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) { JobId jobId = fileInfo.getJobId(); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Adding "+jobId+" to job list cache with "+fileInfo.getJobIndexInfo()) */ LOG.adding_job_list_cache_with(jobId.toString(), String.valueOf(fileInfo.getJobIndexInfo())) .tag("methodCall") .debug(); } HistoryFileInfo old = cache.putIfAbsent(jobId, fileInfo); if (cache.size() > maxSize) { // There is a race here, where more then one thread could be trying to // remove entries. This could result in too many entries being removed // from the cache. This is considered OK as the size of the cache // should be rather large, and we would rather have performance over // keeping the cache size exactly at the maximum. Iterator<JobId> keys = cache.navigableKeySet().iterator(); long cutoff = System.currentTimeMillis() - maxAge; while (cache.size() > maxSize && keys.hasNext()) { JobId key = keys.next(); HistoryFileInfo firstValue = cache.get(key); if (firstValue != null) { synchronized (firstValue) { if (firstValue.isMovePending()) { if (firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) { cache.remove(key); // Now lets try to delete it try { firstValue.delete(); } catch (IOException e) { /* LOG.error("Error while trying to delete history files"+" that could not be moved to done.",e) */ LOG.error_while_trying_delete_history_files_(e.toString()).error(); } } else { /* LOG.warn("Waiting to remove "+key+" from JobListCache because it is not in done yet.") */ LOG.waiting_remove_from_joblistcache_because(key.toString()).warn(); } } else { cache.remove(key); } } } } } return old; }
private void checkHistoryParsing( final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); long amStartTimeEst = System.currentTimeMillis(); conf.setClass( CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); app.waitForState(job, JobState.SUCCEEDED); // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); FileContext fc = null; try { fc = FileContext.getFileContext(conf); } catch (IOException ioe) { LOG.info("Can not get FileContext", ioe); throw (new Exception("Can not get File Context")); } if (numMaps == numSuccessfulMaps) { String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId); Path summaryFile = new Path(jobhistoryDir, summaryFileName); String jobSummaryString = getJobSummary(fc, summaryFile); Assert.assertNotNull(jobSummaryString); Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100")); Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100")); Map<String, String> jobSummaryElements = new HashMap<String, String>(); StringTokenizer strToken = new StringTokenizer(jobSummaryString, ","); while (strToken.hasMoreTokens()) { String keypair = strToken.nextToken(); jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]); } Assert.assertEquals( "JobId does not match", jobId.toString(), jobSummaryElements.get("jobId")); Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName")); Assert.assertTrue( "submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0); Assert.assertTrue( "launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0); Assert.assertTrue( "firstMapTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0); Assert.assertTrue( "firstReduceTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0); Assert.assertTrue( "finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0); Assert.assertEquals( "Mismatch in num map slots", numSuccessfulMaps, Integer.parseInt(jobSummaryElements.get("numMaps"))); Assert.assertEquals( "Mismatch in num reduce slots", numReduces, Integer.parseInt(jobSummaryElements.get("numReduces"))); Assert.assertEquals( "User does not match", System.getProperty("user.name"), jobSummaryElements.get("user")); Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue")); Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status")); } JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); JobInfo jobInfo; long numFinishedMaps; synchronized (fileInfo) { Path historyFilePath = fileInfo.getHistoryFile(); FSDataInputStream in = null; LOG.info("JobHistoryFile is: " + historyFilePath); try { in = fc.open(fc.makeQualified(historyFilePath)); } catch (IOException ioe) { LOG.info("Can not open history file: " + historyFilePath, ioe); throw (new Exception("Can not open History File")); } JobHistoryParser parser = new JobHistoryParser(in); final EventReader realReader = new EventReader(in); EventReader reader = Mockito.mock(EventReader.class); if (numMaps == numSuccessfulMaps) { reader = realReader; } else { final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack! Mockito.when(reader.getNextEvent()) .thenAnswer( new Answer<HistoryEvent>() { public HistoryEvent answer(InvocationOnMock invocation) throws IOException { HistoryEvent event = realReader.getNextEvent(); if (event instanceof TaskFinishedEvent) { numFinishedEvents.incrementAndGet(); } if (numFinishedEvents.get() <= numSuccessfulMaps) { return event; } else { throw new IOException("test"); } } }); } jobInfo = parser.parse(reader); numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps); if (numFinishedMaps != numMaps) { Exception parseException = parser.getParseException(); Assert.assertNotNull("Didn't get expected parse exception", parseException); } } Assert.assertEquals( "Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername()); Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname()); Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName()); Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath()); Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps); Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces()); Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized()); Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks(); int totalTasks = allTasks.size(); Assert.assertEquals("total number of tasks is incorrect ", (numMaps + numReduces), totalTasks); // Verify aminfo Assert.assertEquals(1, jobInfo.getAMInfos().size()); Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost()); AMInfo amInfo = jobInfo.getAMInfos().get(0); Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort()); Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort()); Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId()); Assert.assertEquals( amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId()); Assert.assertTrue( amInfo.getStartTime() <= System.currentTimeMillis() && amInfo.getStartTime() >= amStartTimeEst); ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1); // Assert at taskAttempt level for (TaskInfo taskInfo : allTasks.values()) { int taskAttemptCount = taskInfo.getAllTaskAttempts().size(); Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount); TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next(); Assert.assertNotNull(taInfo.getContainerId()); // Verify the wrong ctor is not being used. Remove after mrv1 is removed. Assert.assertFalse(taInfo.getContainerId().equals(fakeCid)); } // Deep compare Job and JobInfo for (Task task : job.getTasks().values()) { TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); Assert.assertNotNull("TaskInfo not found", taskInfo); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID()))); Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo); Assert.assertEquals( "Incorrect shuffle port for task attempt", taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort()); if (numMaps == numSuccessfulMaps) { Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname()); Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort()); // Verify rack-name Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME); } } } }
private synchronized void moveToDone() throws IOException { if (LogGlobal.isDebugEnabled()) { /* LOG.debug("moveToDone: "+historyFile) */ LOG.movetodone(historyFile.toString()).debug(); } if (!isMovePending()) { // It was either deleted or is already in done. Either way do nothing if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Move no longer pending") */ LOG.move_longer_pending().debug(); } return; } try { long completeTime = jobIndexInfo.getFinishTime(); if (completeTime == 0) { completeTime = System.currentTimeMillis(); } JobId jobId = jobIndexInfo.getJobId(); List<Path> paths = new ArrayList<Path>(2); if (historyFile == null) { /* LOG.info("No file for job-history with "+jobId+" found in cache!") */ LOG.file_for_job_history_with_found(jobId.toString()).info(); } else { paths.add(historyFile); } if (confFile == null) { /* LOG.info("No file for jobConf with "+jobId+" found in cache!") */ LOG.file_for_jobconf_with_found_cache(jobId.toString()).info(); } else { paths.add(confFile); } if (summaryFile == null) { /* LOG.info("No summary file for job: "+jobId) */ LOG.summary_file_for_job(jobId.toString()).info(); } else { String jobSummaryString = getJobSummary(intermediateDoneDirFc, summaryFile); LOG.summary_file_for_job(jobSummaryString); /* LOG.info("Deleting JobSummary file: ["+summaryFile+"]") */ LOG.deleting_jobsummary_file(summaryFile.toString()).info(); intermediateDoneDirFc.delete(summaryFile, false); summaryFile = null; } Path targetDir = canonicalHistoryLogPath(jobId, completeTime); addDirectoryToSerialNumberIndex(targetDir); makeDoneSubdir(targetDir); if (historyFile != null) { Path toPath = doneDirFc.makeQualified(new Path(targetDir, historyFile.getName())); if (!toPath.equals(historyFile)) { moveToDoneNow(historyFile, toPath); historyFile = toPath; } } if (confFile != null) { Path toPath = doneDirFc.makeQualified(new Path(targetDir, confFile.getName())); if (!toPath.equals(confFile)) { moveToDoneNow(confFile, toPath); confFile = toPath; } } state = HistoryInfoState.IN_DONE; } catch (Throwable t) { /* LOG.error("Error while trying to move a job to done",t) */ LOG.error_while_trying_move_job_done(t.toString()).error(); this.state = HistoryInfoState.MOVE_FAILED; } }