public RecordReader<Text, Text> getRecordReader(
        InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException {

      reporter.setStatus(genericSplit.toString());
      FileSplit split = (FileSplit) genericSplit;
      final Path file = split.getPath();
      FileSystem fs = file.getFileSystem(job);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (compressionCodecs != null && compressionCodecs.getCodec(file) != null)
        throw new RuntimeException("Not handling compression!");

      return new StreamXmlRecordReader(fileIn, split, reporter, job, FileSystem.get(job));
    }
 @SuppressWarnings("unchecked")
 @Override
 /**
  * Instantiates a FileCollectionRecordReader using the specified spit (which is assumed to be a
  * CombineFileSplit.
  *
  * @param genericSplit contains files to be processed, assumed to be a CombineFileSplit
  * @param job JobConf of this job
  * @param reported To report progress
  */
 public RecordReader<Text, SplitAwareWrapper<Document>> getRecordReader(
     InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException {
   reporter.setStatus(genericSplit.toString());
   return new FileCollectionRecordReader(job, (PositionAwareSplit<CombineFileSplit>) genericSplit);
 }
Пример #3
0
    @Override
    public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) {

      //      try{ compatible with hadoop-0.14 TODO MC
      reporter.setStatus(split.toString());
      /*
           } catch (IOException e) {
             throw new RuntimeException("Cannot set status for reported:", e);
           }
      */
      // find part name
      SegmentPart segmentPart;
      final String spString;
      try {
        segmentPart = SegmentPart.get((FileSplit) split);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }

      try {
        return new SequenceFileRecordReader(job, (FileSplit) split) {

          @Override
          public synchronized boolean next(Writable key, Writable value) throws IOException {
            LOG.debug("Running OIF.next()");

            MetaWrapper wrapper = (MetaWrapper) value;
            try {
              wrapper.set(getValueClass().newInstance());
            } catch (Exception e) {
              throw new IOException(e.toString());
            }

            boolean res = super.next(key, (Writable) wrapper.get());
            wrapper.setMeta(SEGMENT_PART_KEY, spString);
            return res;
          }

          @Override
          public Writable createValue() {
            return new MetaWrapper();
          }
        };
      } catch (IOException e) {
        throw new RuntimeException("Cannot create RecordReader: ", e);
      }
    }
 @Override
 public RecordReader<LongWritable, Text> getRecordReader(
     InputSplit split, JobConf job, Reporter reporter) throws IOException {
   reporter.setStatus(split.toString());
   return new ExampleRecordReader(job, (FileSplit) split);
 }
 // constructor used by the old API
 ESRecordReader(
     org.apache.hadoop.mapred.InputSplit split, Configuration job, Reporter reporter) {
   reporter.setStatus(split.toString());
   init((ESInputSplit) split, job);
 }
 @Override
 public RecordReader<LongWritable, Text> getRecordReader(
     InputSplit split, JobConf conf, Reporter reporter) throws IOException {
   reporter.setStatus(split.toString());
   return new DeprecatedLzoLineRecordReader(conf, (FileSplit) split);
 }