public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn =;
      if (m_Start > 0);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      m_Current = m_Start;
      m_Key = split.getPath().getName();
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;

        if (bytesRemaining != 0) {
              new FileSplit(
                  length - bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
 private Job configureJob(Path secretsPath, Path saltFilePath, Path inputPath, Path outputPath)
     throws Exception {
   Job job = Job.getInstance(getConf());
   FileInputFormat.setInputPaths(job, inputPath);
   FileOutputFormat.setOutputPath(job, outputPath);
   job.getConfiguration().set(ObfuscateMapper.SECRET_WORDS_FILE_KEY, secretsPath.toString());
   job.getConfiguration().set(ObfuscateMapper.SALT_FILE_KEY, saltFilePath.toString());
   FileSystem.get(outputPath.toUri(), getConf()).delete(outputPath, true);
   return job;
 protected boolean isSplitable(JobContext context, Path file) {
   String fname = file.getName().toLowerCase();
   //noinspection SimplifiableIfStatementf,RedundantIfStatement
   if (fname.endsWith(".gz")) return false;
   return true;
 protected boolean isPathAcceptable(final Path pPath1) {
   String path = pPath1.toString().toLowerCase();
   if (path.startsWith("part-r-")) return true;
   String extension = getExtension();
   if (extension != null && path.endsWith(extension.toLowerCase())) return true;
   if (extension != null && path.endsWith(extension.toLowerCase() + ".gz")) return true;
   //noinspection SimplifiableIfStatement,RedundantIfStatement
   if (extension == null) return true;
   return false;
 private int getNumSecrets(Path secretsPath) throws Exception {
   FileSystem fileSystem = FileSystem.get(secretsPath.toUri(), getConf());
   FSDataInputStream inputStream =;
   BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
   String currentLine;
   int numLines = 0;
   while ((currentLine = reader.readLine()) != null) {
     if (!currentLine.isEmpty()) {
   return numLines;
 private void generateSaltIfNeeded(Path saltFilePath, Path secretsPath) throws Exception {
   FileSystem fileSystem = FileSystem.get(saltFilePath.toUri(), getConf());
   if (!fileSystem.exists(saltFilePath)) {
     FSDataOutputStream outputStream = fileSystem.create(saltFilePath);
     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputStream));
     int numSaltsToGenerate = getNumSecrets(secretsPath);
     System.out.printf("Generating %d salts\n", numSaltsToGenerate);
     for (int i = 0; i < numSaltsToGenerate; i++) {