Example #1
0
    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }
  /** Get the record reader for the next chunk in this CombineFileSplit. */
  protected boolean initNextRecordReader() throws IOException {

    if (curReader != null) {
      curReader.close();
      curReader = null;
      if (idx > 0) {
        progress += split.getLength(idx - 1); // done processing so far
      }
    }

    // if all chunks have been processed, nothing more to do.
    if (idx == split.getNumPaths()) {
      return false;
    }

    // get a record reader for the idx-th chunk
    try {
      Configuration conf = context.getConfiguration();
      // setup some helper config variables.
      conf.set(MRJobConfig.MAP_INPUT_FILE, split.getPath(idx).toString());
      conf.setLong(MRJobConfig.MAP_INPUT_START, split.getOffset(idx));
      conf.setLong(MRJobConfig.MAP_INPUT_PATH, split.getLength(idx));

      curReader = rrConstructor.newInstance(new Object[] {split, context, Integer.valueOf(idx)});

      if (idx > 0) {
        // initialize() for the first RecordReader will be called by MapTask;
        // we're responsible for initializing subsequent RecordReaders.
        curReader.initialize(split, context);
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    idx++;
    return true;
  }