public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
/** Get the record reader for the next chunk in this CombineFileSplit. */ protected boolean initNextRecordReader() throws IOException { if (curReader != null) { curReader.close(); curReader = null; if (idx > 0) { progress += split.getLength(idx - 1); // done processing so far } } // if all chunks have been processed, nothing more to do. if (idx == split.getNumPaths()) { return false; } // get a record reader for the idx-th chunk try { Configuration conf = context.getConfiguration(); // setup some helper config variables. conf.set(MRJobConfig.MAP_INPUT_FILE, split.getPath(idx).toString()); conf.setLong(MRJobConfig.MAP_INPUT_START, split.getOffset(idx)); conf.setLong(MRJobConfig.MAP_INPUT_PATH, split.getLength(idx)); curReader = rrConstructor.newInstance(new Object[] {split, context, Integer.valueOf(idx)}); if (idx > 0) { // initialize() for the first RecordReader will be called by MapTask; // we're responsible for initializing subsequent RecordReaders. curReader.initialize(split, context); } } catch (Exception e) { throw new RuntimeException(e); } idx++; return true; }