コード例 #1
0
    /**
     * splits the input files into tasks handled by a single node we have to read the input files to
     * do this based on a number of items in a sequence
     */
    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
      long filesPerTask = DistBlockFixer.filesPerTask(job.getConfiguration());

      Path[] inPaths = getInputPaths(job);

      List<InputSplit> splits = new ArrayList<InputSplit>();

      long fileCounter = 0;

      for (Path inPath : inPaths) {

        FileSystem fs = inPath.getFileSystem(job.getConfiguration());

        if (!fs.getFileStatus(inPath).isDir()) {
          throw new IOException(inPath.toString() + " is not a directory");
        }

        FileStatus[] inFiles = fs.listStatus(inPath);

        for (FileStatus inFileStatus : inFiles) {
          Path inFile = inFileStatus.getPath();

          if (!inFileStatus.isDir()
              && (inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {

            fileCounter++;
            SequenceFile.Reader inFileReader =
                new SequenceFile.Reader(fs, inFile, job.getConfiguration());

            long startPos = inFileReader.getPosition();
            long counter = 0;

            // create an input split every filesPerTask items in the sequence
            LongWritable key = new LongWritable();
            Text value = new Text();
            try {
              while (inFileReader.next(key, value)) {
                if (counter % filesPerTask == filesPerTask - 1L) {
                  splits.add(
                      new FileSplit(inFile, startPos, inFileReader.getPosition() - startPos, null));
                  startPos = inFileReader.getPosition();
                }
                counter++;
              }

              // create input split for remaining items if necessary
              // this includes the case where no splits were created by the loop
              if (startPos != inFileReader.getPosition()) {
                splits.add(
                    new FileSplit(inFile, startPos, inFileReader.getPosition() - startPos, null));
              }
            } finally {
              inFileReader.close();
            }
          }
        }
      }

      LOG.info("created " + splits.size() + " input splits from " + fileCounter + " files");

      return splits;
    }
コード例 #2
0
 public DistBlockFixer(Configuration conf) {
   super(conf);
   filesPerTask = DistBlockFixer.filesPerTask(getConf());
   maxPendingJobs = DistBlockFixer.maxPendingJobs(getConf());
   maxFixTimeForFile = DistBlockFixer.maxFixTimeForFile(getConf());
 }