/**
   * checks if jobs have completed and updates job and file index returns a list of failed files for
   * restarting
   */
  void checkJobs() throws IOException {
    Iterator<Job> jobIter = jobIndex.keySet().iterator();
    while (jobIter.hasNext()) {
      Job job = jobIter.next();

      try {
        if (job.isComplete()) {
          long slotSeconds =
              job.getCounters().findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue()
                  / 1000;
          RaidNodeMetrics.getInstance().blockFixSlotSeconds.inc(slotSeconds);
          long filesSucceeded =
              job.getCounters().findCounter(Counter.FILES_SUCCEEDED) != null
                  ? job.getCounters().findCounter(Counter.FILES_SUCCEEDED).getValue()
                  : 0;
          long filesFailed =
              job.getCounters().findCounter(Counter.FILES_FAILED) != null
                  ? job.getCounters().findCounter(Counter.FILES_FAILED).getValue()
                  : 0;
          long filesNoAction =
              job.getCounters().findCounter(Counter.FILES_NOACTION) != null
                  ? job.getCounters().findCounter(Counter.FILES_NOACTION).getValue()
                  : 0;
          int files = jobIndex.get(job).size();
          if (job.isSuccessful()
              && (filesSucceeded + filesFailed + filesNoAction == ((long) files))) {
            // job has processed all files
            succeedJob(job, filesSucceeded, filesFailed);
          } else {
            failJob(job);
          }
          jobIter.remove();
        } else {
          LOG.info("Job " + job.getID() + "(" + job.getJobName() + " still running");
        }
      } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
        failJob(job);
        try {
          job.killJob();
        } catch (Exception ee) {
          LOG.error(StringUtils.stringifyException(ee));
        }
        jobIter.remove();
      }
    }
    purgeFileIndex();
  }