private void addDirectoryToJobListCache(Path path) throws IOException {
   if (LogGlobal.isDebugEnabled()) {
     /* LOG.debug("Adding "+path+" to job list cache.") */
     LOG.adding_job_list_cache(path.toString()).debug();
   }
   List<FileStatus> historyFileList = scanDirectoryForHistoryFiles(path, doneDirFc);
   for (FileStatus fs : historyFileList) {
     if (LogGlobal.isDebugEnabled()) {
       /* LOG.debug("Adding in history for "+fs.getPath()) */
       LOG.adding_history_for(String.valueOf(fs.getPath())).tag("methodCall").debug();
     }
     JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath().getName());
     String confFileName = JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId());
     String summaryFileName =
         JobHistoryUtils.getIntermediateSummaryFileName(jobIndexInfo.getJobId());
     HistoryFileInfo fileInfo =
         new HistoryFileInfo(
             fs.getPath(),
             new Path(fs.getPath().getParent(), confFileName),
             new Path(fs.getPath().getParent(), summaryFileName),
             jobIndexInfo,
             true);
     jobListCache.addIfAbsent(fileInfo);
   }
 }
 private void makeDoneSubdir(Path path) throws IOException {
   try {
     doneDirFc.getFileStatus(path);
     existingDoneSubdirs.add(path);
   } catch (FileNotFoundException fnfE) {
     try {
       FsPermission fsp = new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION);
       doneDirFc.mkdir(path, fsp, true);
       FileStatus fsStatus = doneDirFc.getFileStatus(path);
       /* LOG.info("Perms after creating "+fsStatus.getPermission().toShort()+", Expected: "+fsp.toShort()) */
       LOG.perms_after_creating_expected(
               String.valueOf(fsStatus.getPermission().toShort()), String.valueOf(fsp.toShort()))
           .tag("methodCall")
           .info();
       if (fsStatus.getPermission().toShort() != fsp.toShort()) {
         /* LOG.info("Explicitly setting permissions to : "+fsp.toShort()+", "+fsp) */
         LOG.explicitly_setting_permissions(String.valueOf(fsp.toShort()), fsp.toString())
             .tag("methodCall")
             .info();
         doneDirFc.setPermission(path, fsp);
       }
       existingDoneSubdirs.add(path);
     } catch (FileAlreadyExistsException faeE) { // Nothing to do.
     }
   }
 }
  /**
   * List input directories. Subclasses may override to, e.g., select only files matching a regular
   * expression.
   *
   * @param job the job to list input paths for
   * @return array of FileStatus objects
   * @throws IOException if zero items.
   */
  protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
      throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
      filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = null;

    int numThreads =
        job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS);
    Stopwatch sw = new Stopwatch().start();
    if (numThreads == 1) {
      result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
    } else {
      Iterable<FileStatus> locatedFiles = null;
      try {
        LocatedFileStatusFetcher locatedFileStatusFetcher =
            new LocatedFileStatusFetcher(
                job.getConfiguration(), dirs, recursive, inputFilter, true);
        locatedFiles = locatedFileStatusFetcher.getFileStatuses();
      } catch (InterruptedException e) {
        throw new IOException("Interrupted while getting file statuses");
      }
      result = Lists.newArrayList(locatedFiles);
    }

    sw.stop();
    if (LogGlobal.isDebugEnabled()) {
      /* LOG.debug("Time taken to get FileStatuses: "+sw.elapsedMillis()) */
      LOG.time_taken_get_filestatuses(String.valueOf(sw.elapsedMillis())).tag("methodCall").debug();
    }
    /* LOG.info("Total input paths to process : "+result.size()) */
    LOG.total_input_paths_process(String.valueOf(result.size())).tag("methodCall").info();
    return result;
  }
 private void moveToDoneNow(final Path src, final Path target) throws IOException {
   /* LOG.info("Moving "+src.toString()+" to "+target.toString()) */
   LOG.moving(String.valueOf(src.toString()), String.valueOf(target.toString()))
       .tag("methodCall")
       .info();
   intermediateDoneDirFc.rename(src, target, Options.Rename.NONE);
 }
 public void delete(HistoryFileInfo fileInfo) {
   if (LogGlobal.isDebugEnabled()) {
     /* LOG.debug("Removing from cache "+fileInfo) */
     LOG.removing_from_cache(fileInfo.toString()).debug();
   }
   cache.remove(fileInfo.getJobId());
 }
 public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) {
   JobId jobId = fileInfo.getJobId();
   if (LogGlobal.isDebugEnabled()) {
     /* LOG.debug("Adding "+jobId+" to job list cache with "+fileInfo.getJobIndexInfo()) */
     LOG.adding_job_list_cache_with(jobId.toString(), String.valueOf(fileInfo.getJobIndexInfo()))
         .tag("methodCall")
         .debug();
   }
   HistoryFileInfo old = cache.putIfAbsent(jobId, fileInfo);
   if (cache.size() > maxSize) {
     // There is a race here, where more then one thread could be trying to
     // remove entries.  This could result in too many entries being removed
     // from the cache.  This is considered OK as the size of the cache
     // should be rather large, and we would rather have performance over
     // keeping the cache size exactly at the maximum.
     Iterator<JobId> keys = cache.navigableKeySet().iterator();
     long cutoff = System.currentTimeMillis() - maxAge;
     while (cache.size() > maxSize && keys.hasNext()) {
       JobId key = keys.next();
       HistoryFileInfo firstValue = cache.get(key);
       if (firstValue != null) {
         synchronized (firstValue) {
           if (firstValue.isMovePending()) {
             if (firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) {
               cache.remove(key);
               // Now lets try to delete it
               try {
                 firstValue.delete();
               } catch (IOException e) {
                 /* LOG.error("Error while trying to delete history files"+" that could not be moved to done.",e) */
                 LOG.error_while_trying_delete_history_files_(e.toString()).error();
               }
             } else {
               /* LOG.warn("Waiting to remove "+key+" from JobListCache because it is not in done yet.") */
               LOG.waiting_remove_from_joblistcache_because(key.toString()).warn();
             }
           } else {
             cache.remove(key);
           }
         }
       }
     }
   }
   return old;
 }
 private void removeDirectoryFromSerialNumberIndex(Path serialDirPath) {
   String serialPart = serialDirPath.getName();
   String timeStampPart = JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString());
   if (timeStampPart == null) {
     /* LOG.warn("Could not find timestamp portion from path: "+serialDirPath.toString()+". Continuing with next") */
     LOG.could_not_find_timestamp_portion_from_co(String.valueOf(serialDirPath.toString()))
         .tag("methodCall")
         .warn();
     return;
   }
   if (serialPart == null) {
     /* LOG.warn("Could not find serial portion from path: "+serialDirPath.toString()+". Continuing with next") */
     LOG.could_not_find_serial_portion_from_conti(String.valueOf(serialDirPath.toString()))
         .tag("methodCall")
         .warn();
     return;
   }
   serialNumberIndex.remove(serialPart, timeStampPart);
 }
 private void addDirectoryToSerialNumberIndex(Path serialDirPath) {
   if (LogGlobal.isDebugEnabled()) {
     /* LOG.debug("Adding "+serialDirPath+" to serial index") */
     LOG.adding_serial_index(serialDirPath.toString()).debug();
   }
   String serialPart = serialDirPath.getName();
   String timestampPart = JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString());
   if (timestampPart == null) {
     /* LOG.warn("Could not find timestamp portion from path: "+serialDirPath+". Continuing with next") */
     LOG.could_not_find_timestamp_portion_from_co(serialDirPath.toString()).warn();
     return;
   }
   if (serialPart == null) {
     /* LOG.warn("Could not find serial portion from path: "+serialDirPath.toString()+". Continuing with next") */
     LOG.could_not_find_serial_portion_from_conti(String.valueOf(serialDirPath.toString()))
         .tag("methodCall")
         .warn();
   } else {
     serialNumberIndex.add(serialPart, timestampPart);
   }
 }
 /** Populates index data structures. Should only be called at initialization times. */
 @SuppressWarnings("unchecked")
 void initExisting() throws IOException {
   /* LOG.info("Initializing Existing Jobs...") */
   LOG.initializing_existing_jobs().info();
   List<FileStatus> timestampedDirList = findTimestampedDirectories();
   // Sort first just so insertion is in a consistent order
   Collections.sort(timestampedDirList);
   for (FileStatus fs : timestampedDirList) {
     // TODO Could verify the correct format for these directories.
     addDirectoryToSerialNumberIndex(fs.getPath());
     addDirectoryToJobListCache(fs.getPath());
   }
 }
  private void mkdir(FileContext fc, Path path, FsPermission fsp) throws IOException {
    if (!fc.util().exists(path)) {
      try {
        fc.mkdir(path, fsp, true);

        FileStatus fsStatus = fc.getFileStatus(path);
        /* LOG.info("Perms after creating "+fsStatus.getPermission().toShort()+", Expected: "+fsp.toShort()) */
        LOG.perms_after_creating_expected(
                String.valueOf(fsStatus.getPermission().toShort()), String.valueOf(fsp.toShort()))
            .tag("methodCall")
            .info();
        if (fsStatus.getPermission().toShort() != fsp.toShort()) {
          /* LOG.info("Explicitly setting permissions to : "+fsp.toShort()+", "+fsp) */
          LOG.explicitly_setting_permissions(String.valueOf(fsp.toShort()), fsp.toString())
              .tag("methodCall")
              .info();
          fc.setPermission(path, fsp);
        }
      } catch (FileAlreadyExistsException e) {
        /* LOG.info("Directory: ["+path+"] already exists.") */
        LOG.directory_already_exists(path.toString()).info();
      }
    }
  }
 public synchronized void add(String serialPart, String timestampPart) {
   if (!cache.containsKey(serialPart)) {
     cache.put(serialPart, new HashSet<String>());
     if (cache.size() > maxSize) {
       String key = cache.firstKey();
       /* LOG.error("Dropping "+key+" from the SerialNumberIndex. We will no "+"longer be able to see jobs that are in that serial index for "+cache.get(key)) */
       LOG.dropping_from_serialnumberindex_will_lon(key, String.valueOf(cache.get(key)))
           .tag("methodCall")
           .error();
       cache.remove(key);
     }
   }
   Set<String> datePartSet = cache.get(serialPart);
   datePartSet.add(timestampPart);
 }
 /**
  * Scans the intermediate directory to find user directories. Scans these for history files if the
  * modification time for the directory has changed. Once it finds history files it starts the
  * process of moving them to the done directory.
  *
  * @throws IOException if there was a error while scanning
  */
 void scanIntermediateDirectory() throws IOException {
   // TODO it would be great to limit how often this happens, except in the
   // case where we are looking for a particular job.
   List<FileStatus> userDirList =
       JobHistoryUtils.localGlobber(intermediateDoneDirFc, intermediateDoneDirPath, "");
   /* LOG.debug("Scanning intermediate dirs") */
   LOG.scanning_intermediate_dirs().debug();
   for (FileStatus userDir : userDirList) {
     String name = userDir.getPath().getName();
     UserLogDir dir = userDirModificationTimeMap.get(name);
     if (dir == null) {
       dir = new UserLogDir();
       UserLogDir old = userDirModificationTimeMap.putIfAbsent(name, dir);
       if (old != null) {
         dir = old;
       }
     }
     dir.scanIfNeeded(userDir);
   }
 }
  /**
   * Generate the list of files and make them into FileSplits.
   *
   * @param job the job context
   * @throws IOException
   */
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    Stopwatch sw = new Stopwatch().start();
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
      Path path = file.getPath();
      long length = file.getLen();
      if (length != 0) {
        BlockLocation[] blkLocations;
        if (file instanceof LocatedFileStatus) {
          blkLocations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
          FileSystem fs = path.getFileSystem(job.getConfiguration());
          blkLocations = fs.getFileBlockLocations(file, 0, length);
        }
        if (isSplitable(job, path)) {
          long blockSize = file.getBlockSize();
          long splitSize = computeSplitSize(blockSize, minSize, maxSize);

          long bytesRemaining = length;
          while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
            splits.add(
                makeSplit(
                    path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
            bytesRemaining -= splitSize;
          }

          if (bytesRemaining != 0) {
            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
            splits.add(
                makeSplit(
                    path,
                    length - bytesRemaining,
                    bytesRemaining,
                    blkLocations[blkIndex].getHosts()));
          }
        } else { // not splitable
          splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
        }
      } else {
        // Create empty hosts array for zero length files
        splits.add(makeSplit(path, 0, length, new String[0]));
      }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    sw.stop();
    if (LogGlobal.isDebugEnabled()) {
      /* LOG.debug("Total # of splits generated by getSplits: "+splits.size()+", TimeTaken: "+sw.elapsedMillis()) */
      LOG.total_splits_generated_getsplits_timetak(
              String.valueOf(splits.size()), String.valueOf(sw.elapsedMillis()))
          .tag("methodCall")
          .debug();
    }
    return splits;
  }
  /**
   * Scans the specified path and populates the intermediate cache.
   *
   * @param absPath
   * @throws IOException
   */
  private void scanIntermediateDirectory(final Path absPath) throws IOException {
    if (LogGlobal.isDebugEnabled()) {
      /* LOG.debug("Scanning intermediate dir "+absPath) */
      LOG.scanning_intermediate_dir(absPath.toString()).debug();
    }
    List<FileStatus> fileStatusList = scanDirectoryForHistoryFiles(absPath, intermediateDoneDirFc);
    if (LogGlobal.isDebugEnabled()) {
      /* LOG.debug("Found "+fileStatusList.size()+" files") */
      LOG.found_files(String.valueOf(fileStatusList.size())).tag("methodCall").debug();
    }
    for (FileStatus fs : fileStatusList) {
      if (LogGlobal.isDebugEnabled()) {
        /* LOG.debug("scanning file: "+fs.getPath()) */
        LOG.scanning_file(String.valueOf(fs.getPath())).tag("methodCall").debug();
      }
      JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath().getName());
      String confFileName = JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId());
      String summaryFileName =
          JobHistoryUtils.getIntermediateSummaryFileName(jobIndexInfo.getJobId());
      HistoryFileInfo fileInfo =
          new HistoryFileInfo(
              fs.getPath(),
              new Path(fs.getPath().getParent(), confFileName),
              new Path(fs.getPath().getParent(), summaryFileName),
              jobIndexInfo,
              false);

      final HistoryFileInfo old = jobListCache.addIfAbsent(fileInfo);
      if (old == null || old.didMoveFail()) {
        final HistoryFileInfo found = (old == null) ? fileInfo : old;
        long cutoff = System.currentTimeMillis() - maxHistoryAge;
        if (found.getJobIndexInfo().getFinishTime() <= cutoff) {
          try {
            found.delete();
          } catch (IOException e) {
            /* LOG.warn("Error cleaning up a HistoryFile that is out of date.",e) */
            LOG.error_cleaning_historyfile_that_out_date(e.toString()).warn();
          }
        } else {
          if (LogGlobal.isDebugEnabled()) {
            /* LOG.debug("Scheduling move to done of "+found) */
            LOG.scheduling_move_done(found.toString()).debug();
          }
          moveToDoneExecutor.execute(
              new Runnable() {
                @Override
                public void run() {
                  try {
                    found.moveToDone();
                  } catch (IOException e) {
                    /* LOG.info("Failed to process fileInfo for job: "+found.getJobId(),e) */
                    LOG.failed_process_fileinfo_for_job(
                            String.valueOf(found.getJobId()), e.toString())
                        .tag("methodCall")
                        .info();
                  }
                }
              });
        }
      } else if (old != null && !old.isMovePending()) {
        // This is a duplicate so just delete it
        if (LogGlobal.isDebugEnabled()) {
          /* LOG.debug("Duplicate: deleting") */
          LOG.duplicate_deleting().debug();
        }
        fileInfo.delete();
      }
    }
  }
 /**
  * Returns TRUE if the history dirs were created, FALSE if they could not be created because the
  * FileSystem is not reachable or in safe mode and throws and exception otherwise.
  */
 @VisibleForTesting
 boolean tryCreatingHistoryDirs(boolean logWait) throws IOException {
   boolean succeeded = true;
   String doneDirPrefix = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
   try {
     doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(new Path(doneDirPrefix));
     doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf);
     doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK);
     mkdir(
         doneDirFc,
         doneDirPrefixPath,
         new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION));
   } catch (ConnectException ex) {
     if (logWait) {
       /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be available") */
       LOG.waiting_for_filesystem_available(
               String.valueOf(doneDirPrefixPath.toUri().getAuthority()))
           .tag("methodCall")
           .info();
     }
     succeeded = false;
   } catch (IOException e) {
     if (isBecauseSafeMode(e)) {
       succeeded = false;
       if (logWait) {
         /* LOG.info("Waiting for FileSystem at "+doneDirPrefixPath.toUri().getAuthority()+"to be out of safe mode") */
         LOG.waiting_for_filesystem_out_safe_mode(
                 String.valueOf(doneDirPrefixPath.toUri().getAuthority()))
             .tag("methodCall")
             .info();
       }
     } else {
       throw new YarnRuntimeException(
           "Error creating done directory: [" + doneDirPrefixPath + "]", e);
     }
   }
   if (succeeded) {
     String intermediateDoneDirPrefix =
         JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf);
     try {
       intermediateDoneDirPath =
           FileContext.getFileContext(conf).makeQualified(new Path(intermediateDoneDirPrefix));
       intermediateDoneDirFc = FileContext.getFileContext(intermediateDoneDirPath.toUri(), conf);
       mkdir(
           intermediateDoneDirFc,
           intermediateDoneDirPath,
           new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort()));
     } catch (ConnectException ex) {
       succeeded = false;
       if (logWait) {
         /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be available") */
         LOG.waiting_for_filesystem_available(
                 String.valueOf(intermediateDoneDirPath.toUri().getAuthority()))
             .tag("methodCall")
             .info();
       }
     } catch (IOException e) {
       if (isBecauseSafeMode(e)) {
         succeeded = false;
         if (logWait) {
           /* LOG.info("Waiting for FileSystem at "+intermediateDoneDirPath.toUri().getAuthority()+"to be out of safe mode") */
           LOG.waiting_for_filesystem_out_safe_mode(
                   String.valueOf(intermediateDoneDirPath.toUri().getAuthority()))
               .tag("methodCall")
               .info();
         }
       } else {
         throw new YarnRuntimeException(
             "Error creating intermediate done directory: [" + intermediateDoneDirPath + "]", e);
       }
     }
   }
   return succeeded;
 }