/** * Checks if the map-reduce job has completed. * * @return true if the job completed, false otherwise. * @throws IOException */ public boolean checkComplete() throws IOException { JobID jobID = runningJob.getID(); if (runningJob.isComplete()) { // delete job directory final String jobdir = jobconf.get(JOB_DIR_LABEL); if (jobdir != null) { final Path jobpath = new Path(jobdir); jobpath.getFileSystem(jobconf).delete(jobpath, true); } if (runningJob.isSuccessful()) { LOG.info("Job Complete(Succeeded): " + jobID); } else { LOG.info("Job Complete(Failed): " + jobID); } raidPolicyPathPairList.clear(); Counters ctrs = runningJob.getCounters(); if (ctrs != null) { RaidNodeMetrics metrics = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID); if (ctrs.findCounter(Counter.FILES_FAILED) != null) { long filesFailed = ctrs.findCounter(Counter.FILES_FAILED).getValue(); metrics.raidFailures.inc(filesFailed); } long slotSeconds = ctrs.findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue() / 1000; metrics.raidSlotSeconds.inc(slotSeconds); } return true; } else { String report = (" job " + jobID + " map " + StringUtils.formatPercent(runningJob.mapProgress(), 0) + " reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0)); if (!report.equals(lastReport)) { LOG.info(report); lastReport = report; } TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(jobEventCounter); jobEventCounter += events.length; for (TaskCompletionEvent event : events) { if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) { LOG.info(" Job " + jobID + " " + event.toString()); } } return false; } }
private void getTaskInfos() throws IOException, MalformedURLException { int startIndex = 0; while (true) { TaskCompletionEvent[] taskCompletions = rj.getTaskCompletionEvents(startIndex); if (taskCompletions == null || taskCompletions.length == 0) { break; } boolean more = true; boolean firstError = true; for (TaskCompletionEvent t : taskCompletions) { // For each task completion event, get the associated task id, job id // and the logs String taskId = t.getTaskAttemptId().getTaskID().toString(); String jobId = t.getTaskAttemptId().getJobID().toString(); if (firstError) { console.printError("Examining task ID: " + taskId + " (and more) from job " + jobId); firstError = false; } TaskInfo ti = taskIdToInfo.get(taskId); if (ti == null) { ti = new TaskInfo(jobId); taskIdToInfo.put(taskId, ti); } // These tasks should have come from the same job. assert (ti.getJobId() != null && ti.getJobId().equals(jobId)); String taskAttemptLogUrl = ShimLoader.getHadoopShims() .getTaskAttemptLogUrl(conf, t.getTaskTrackerHttp(), t.getTaskId()); if (taskAttemptLogUrl != null) { ti.getLogUrls().add(taskAttemptLogUrl); } // If a task failed, fetch its error code (if available). // Also keep track of the total number of failures for that // task (typically, a task gets re-run up to 4 times if it fails. if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { String[] diags = rj.getTaskDiagnostics(t.getTaskAttemptId()); ti.setDiagnosticMesgs(diags); if (ti.getErrorCode() == 0) { ti.setErrorCode(extractErrorCode(diags)); } Integer failAttempts = failures.get(taskId); if (failAttempts == null) { failAttempts = Integer.valueOf(0); } failAttempts = Integer.valueOf(failAttempts.intValue() + 1); failures.put(taskId, failAttempts); } else { successes.add(taskId); } } if (!more) { break; } startIndex += taskCompletions.length; } }