@SuppressWarnings("unchecked") @Override public int run(String[] args) throws Exception { MyOptions options = new MyOptions(args, getConf()); traceWriter = options.clazzTraceOutputter.newInstance(); traceWriter.init(options.traceOutput, getConf()); topologyWriter = new DefaultOutputter<LoggedNetworkTopology>(); topologyWriter.init(options.topologyOutput, getConf()); try { JobBuilder jobBuilder = null; for (Path p : options.inputs) { InputDemuxer inputDemuxer = options.inputDemuxerClass.newInstance(); try { inputDemuxer.bindTo(p, getConf()); } catch (IOException e) { LOG.warn("Unable to bind Path " + p + " . Skipping...", e); continue; } Pair<String, InputStream> filePair = null; try { while ((filePair = inputDemuxer.getNext()) != null) { RewindableInputStream ris = new RewindableInputStream(filePair.second()); JobHistoryParser parser = null; try { String jobID = JobHistoryUtils.extractJobID(filePair.first()); if (jobID == null) { LOG.warn("File skipped: Invalid file name: " + filePair.first()); continue; } if ((jobBuilder == null) || (!jobBuilder.getJobID().equals(jobID))) { if (jobBuilder != null) { traceWriter.output(jobBuilder.build()); } jobBuilder = new JobBuilder(jobID); } if (JobHistoryUtils.isJobConfXml(filePair.first())) { processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder); } else { parser = JobHistoryParserFactory.getParser(ris); if (parser == null) { LOG.warn("File skipped: Cannot find suitable parser: " + filePair.first()); } else { processJobHistory(parser, jobBuilder); } } } finally { if (parser == null) { ris.close(); } else { parser.close(); parser = null; } } } } catch (Throwable t) { if (filePair != null) { LOG.warn( "TraceBuilder got an error while processing the [possibly virtual] file " + filePair.first() + " within Path " + p, t); } } finally { inputDemuxer.close(); } } if (jobBuilder != null) { traceWriter.output(jobBuilder.build()); jobBuilder = null; } else { LOG.warn("No job found in traces: "); } topologyWriter.output(topologyBuilder.build()); } finally { traceWriter.close(); topologyWriter.close(); } return 0; }
/** Test if {@link CurrentJHParser} can read events from current JH files. */ @Test public void testCurrentJHParser() throws Exception { final Configuration conf = new Configuration(); final FileSystem lfs = FileSystem.getLocal(conf); final Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs); final Path tempDir = new Path(rootTempDir, "TestCurrentJHParser"); lfs.delete(tempDir, true); String queueName = "testQueue"; // Run a MR job // create a MR cluster conf.setInt("mapred.tasktracker.map.tasks.maximum", 1); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1); conf.set("mapred.queue.names", queueName); MiniMRCluster mrCluster = new MiniMRCluster(1, "file:///", 1, null, null, new JobConf(conf)); // run a job Path inDir = new Path(tempDir, "input"); Path outDir = new Path(tempDir, "output"); JobHistoryParser parser = null; RewindableInputStream ris = null; ArrayList<String> seenEvents = new ArrayList<String>(10); RunningJob rJob = null; try { JobConf jobConf = mrCluster.createJobConf(); jobConf.setQueueName(queueName); // construct a job with 1 map and 1 reduce task. rJob = UtilsForTests.runJob(jobConf, inDir, outDir, 1, 1); rJob.waitForCompletion(); assertTrue("Job failed", rJob.isSuccessful()); JobID id = rJob.getID(); // get the jobhistory filepath Path inputPath = new Path(JobHistory.getHistoryFilePath(org.apache.hadoop.mapred.JobID.downgrade(id))); // wait for 10 secs for the jobhistory file to move into the done folder for (int i = 0; i < 100; ++i) { if (lfs.exists(inputPath)) { break; } TimeUnit.MILLISECONDS.wait(100); } assertTrue("Missing job history file", lfs.exists(inputPath)); InputDemuxer inputDemuxer = new DefaultInputDemuxer(); inputDemuxer.bindTo(inputPath, conf); Pair<String, InputStream> filePair = inputDemuxer.getNext(); assertNotNull(filePair); ris = new RewindableInputStream(filePair.second()); // Test if the JobHistoryParserFactory can detect the parser correctly parser = JobHistoryParserFactory.getParser(ris); // Get ParsedJob String jobId = TraceBuilder.extractJobID(filePair.first()); JobBuilder builder = new JobBuilder(jobId); HistoryEvent e; while ((e = parser.nextEvent()) != null) { String eventString = e.getEventType().toString(); System.out.println(eventString); seenEvents.add(eventString); if (builder != null) { builder.process(e); } } ParsedJob parsedJob = builder.build(); // validate the obtainXXX api of ParsedJob, ParsedTask and // ParsedTaskAttempt. validateParsedJob(parsedJob, 1, 1, queueName); } finally { // stop the MR cluster mrCluster.shutdown(); if (ris != null) { ris.close(); } if (parser != null) { parser.close(); } // cleanup the filesystem lfs.delete(tempDir, true); } // Check against the gold standard System.out.println("testCurrentJHParser validating using gold std "); String[] goldLines = new String[] { "JOB_SUBMITTED", "JOB_PRIORITY_CHANGED", "JOB_STATUS_CHANGED", "JOB_INITED", "JOB_INFO_CHANGED", "TASK_STARTED", "MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED", "TASK_UPDATED", "TASK_FINISHED", "JOB_STATUS_CHANGED", "TASK_STARTED", "MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED", "TASK_UPDATED", "TASK_FINISHED", "TASK_STARTED", "MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "REDUCE_ATTEMPT_FINISHED", "TASK_UPDATED", "TASK_FINISHED", "TASK_STARTED", "MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED", "TASK_UPDATED", "TASK_FINISHED", "JOB_STATUS_CHANGED", "JOB_FINISHED" }; // Check the output with gold std assertEquals("Size mismatch", goldLines.length, seenEvents.size()); int index = 0; for (String goldLine : goldLines) { assertEquals("Content mismatch", goldLine, seenEvents.get(index++)); } }