Beispiel #1
0
 /**
  * Checks if the map-reduce job has completed.
  *
  * @return true if the job completed, false otherwise.
  * @throws IOException
  */
 public boolean checkComplete() throws IOException {
   JobID jobID = runningJob.getID();
   if (runningJob.isComplete()) {
     // delete job directory
     final String jobdir = jobconf.get(JOB_DIR_LABEL);
     if (jobdir != null) {
       final Path jobpath = new Path(jobdir);
       jobpath.getFileSystem(jobconf).delete(jobpath, true);
     }
     if (runningJob.isSuccessful()) {
       LOG.info("Job Complete(Succeeded): " + jobID);
     } else {
       LOG.info("Job Complete(Failed): " + jobID);
     }
     raidPolicyPathPairList.clear();
     Counters ctrs = runningJob.getCounters();
     if (ctrs != null) {
       RaidNodeMetrics metrics = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);
       if (ctrs.findCounter(Counter.FILES_FAILED) != null) {
         long filesFailed = ctrs.findCounter(Counter.FILES_FAILED).getValue();
         metrics.raidFailures.inc(filesFailed);
       }
       long slotSeconds =
           ctrs.findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue() / 1000;
       metrics.raidSlotSeconds.inc(slotSeconds);
     }
     return true;
   } else {
     String report =
         (" job "
             + jobID
             + " map "
             + StringUtils.formatPercent(runningJob.mapProgress(), 0)
             + " reduce "
             + StringUtils.formatPercent(runningJob.reduceProgress(), 0));
     if (!report.equals(lastReport)) {
       LOG.info(report);
       lastReport = report;
     }
     TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(jobEventCounter);
     jobEventCounter += events.length;
     for (TaskCompletionEvent event : events) {
       if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) {
         LOG.info(" Job " + jobID + " " + event.toString());
       }
     }
     return false;
   }
 }
Beispiel #2
0
    private void getTaskInfos() throws IOException, MalformedURLException {
      int startIndex = 0;
      while (true) {
        TaskCompletionEvent[] taskCompletions = rj.getTaskCompletionEvents(startIndex);

        if (taskCompletions == null || taskCompletions.length == 0) {
          break;
        }

        boolean more = true;
        boolean firstError = true;
        for (TaskCompletionEvent t : taskCompletions) {
          // For each task completion event, get the associated task id, job id
          // and the logs
          String taskId = t.getTaskAttemptId().getTaskID().toString();
          String jobId = t.getTaskAttemptId().getJobID().toString();
          if (firstError) {
            console.printError("Examining task ID: " + taskId + " (and more) from job " + jobId);
            firstError = false;
          }

          TaskInfo ti = taskIdToInfo.get(taskId);
          if (ti == null) {
            ti = new TaskInfo(jobId);
            taskIdToInfo.put(taskId, ti);
          }
          // These tasks should have come from the same job.
          assert (ti.getJobId() != null && ti.getJobId().equals(jobId));
          String taskAttemptLogUrl =
              ShimLoader.getHadoopShims()
                  .getTaskAttemptLogUrl(conf, t.getTaskTrackerHttp(), t.getTaskId());
          if (taskAttemptLogUrl != null) {
            ti.getLogUrls().add(taskAttemptLogUrl);
          }

          // If a task failed, fetch its error code (if available).
          // Also keep track of the total number of failures for that
          // task (typically, a task gets re-run up to 4 times if it fails.
          if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
            String[] diags = rj.getTaskDiagnostics(t.getTaskAttemptId());
            ti.setDiagnosticMesgs(diags);
            if (ti.getErrorCode() == 0) {
              ti.setErrorCode(extractErrorCode(diags));
            }

            Integer failAttempts = failures.get(taskId);
            if (failAttempts == null) {
              failAttempts = Integer.valueOf(0);
            }
            failAttempts = Integer.valueOf(failAttempts.intValue() + 1);
            failures.put(taskId, failAttempts);
          } else {
            successes.add(taskId);
          }
        }
        if (!more) {
          break;
        }
        startIndex += taskCompletions.length;
      }
    }