public RecordReader<Text, Text> getRecordReader( InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); FileSplit split = (FileSplit) genericSplit; final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (compressionCodecs != null && compressionCodecs.getCodec(file) != null) throw new RuntimeException("Not handling compression!"); return new StreamXmlRecordReader(fileIn, split, reporter, job, FileSystem.get(job)); }
@SuppressWarnings("unchecked") @Override /** * Instantiates a FileCollectionRecordReader using the specified spit (which is assumed to be a * CombineFileSplit. * * @param genericSplit contains files to be processed, assumed to be a CombineFileSplit * @param job JobConf of this job * @param reported To report progress */ public RecordReader<Text, SplitAwareWrapper<Document>> getRecordReader( InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new FileCollectionRecordReader(job, (PositionAwareSplit<CombineFileSplit>) genericSplit); }
@Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) { // try{ compatible with hadoop-0.14 TODO MC reporter.setStatus(split.toString()); /* } catch (IOException e) { throw new RuntimeException("Cannot set status for reported:", e); } */ // find part name SegmentPart segmentPart; final String spString; try { segmentPart = SegmentPart.get((FileSplit) split); spString = segmentPart.toString(); } catch (IOException e) { throw new RuntimeException("Cannot identify segment:", e); } try { return new SequenceFileRecordReader(job, (FileSplit) split) { @Override public synchronized boolean next(Writable key, Writable value) throws IOException { LOG.debug("Running OIF.next()"); MetaWrapper wrapper = (MetaWrapper) value; try { wrapper.set(getValueClass().newInstance()); } catch (Exception e) { throw new IOException(e.toString()); } boolean res = super.next(key, (Writable) wrapper.get()); wrapper.setMeta(SEGMENT_PART_KEY, spString); return res; } @Override public Writable createValue() { return new MetaWrapper(); } }; } catch (IOException e) { throw new RuntimeException("Cannot create RecordReader: ", e); } }
@Override public RecordReader<LongWritable, Text> getRecordReader( InputSplit split, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); return new ExampleRecordReader(job, (FileSplit) split); }
// constructor used by the old API ESRecordReader( org.apache.hadoop.mapred.InputSplit split, Configuration job, Reporter reporter) { reporter.setStatus(split.toString()); init((ESInputSplit) split, job); }
@Override public RecordReader<LongWritable, Text> getRecordReader( InputSplit split, JobConf conf, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); return new DeprecatedLzoLineRecordReader(conf, (FileSplit) split); }