@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context); initInputFormat(jobConf); org.apache.hadoop.mapred.InputSplit[] splits = realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks()); if (splits == null) { return null; } List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length); for (org.apache.hadoop.mapred.InputSplit split : splits) { if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) { org.apache.hadoop.mapred.FileSplit mapredFileSplit = ((org.apache.hadoop.mapred.FileSplit) split); resultSplits.add( new FileSplit( mapredFileSplit.getPath(), mapredFileSplit.getStart(), mapredFileSplit.getLength(), mapredFileSplit.getLocations())); } else { resultSplits.add(new InputSplitWrapper(split)); } } return resultSplits; }
private long[] getInputSizes(InputFormat[] inputFormats, JobConf[] jobConfs) throws IOException { long[] inputSizes = new long[inputFormats.length]; for (int i = 0; i < inputFormats.length; i++) { InputFormat inputFormat = inputFormats[i]; InputSplit[] splits = inputFormat.getSplits(jobConfs[i], 1); for (InputSplit split : splits) inputSizes[i] = inputSizes[i] + split.getLength(); } return inputSizes; }
public static InputSplit deserializeInputSplit(String base64, String className) throws IOException, ReflectiveOperationException { Constructor<?> constructor = Class.forName(className).getDeclaredConstructor(); if (constructor == null) { throw new ReflectiveOperationException( "Class " + className + " does not implement a default constructor."); } constructor.setAccessible(true); InputSplit split = (InputSplit) constructor.newInstance(); ByteArrayDataInput byteArrayDataInput = ByteStreams.newDataInput(Base64.decodeBase64(base64)); split.readFields(byteArrayDataInput); return split; }
public RecordReader<Text, Text> getRecordReader( InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); FileSplit split = (FileSplit) genericSplit; final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (compressionCodecs != null && compressionCodecs.getCodec(file) != null) throw new RuntimeException("Not handling compression!"); return new StreamXmlRecordReader(fileIn, split, reporter, job, FileSystem.get(job)); }
@SuppressWarnings("unchecked") @Override /** * Instantiates a FileCollectionRecordReader using the specified spit (which is assumed to be a * CombineFileSplit. * * @param genericSplit contains files to be processed, assumed to be a CombineFileSplit * @param job JobConf of this job * @param reported To report progress */ public RecordReader<Text, SplitAwareWrapper<Document>> getRecordReader( InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new FileCollectionRecordReader(job, (PositionAwareSplit<CombineFileSplit>) genericSplit); }
@Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) { // try{ compatible with hadoop-0.14 TODO MC reporter.setStatus(split.toString()); /* } catch (IOException e) { throw new RuntimeException("Cannot set status for reported:", e); } */ // find part name SegmentPart segmentPart; final String spString; try { segmentPart = SegmentPart.get((FileSplit) split); spString = segmentPart.toString(); } catch (IOException e) { throw new RuntimeException("Cannot identify segment:", e); } try { return new SequenceFileRecordReader(job, (FileSplit) split) { @Override public synchronized boolean next(Writable key, Writable value) throws IOException { LOG.debug("Running OIF.next()"); MetaWrapper wrapper = (MetaWrapper) value; try { wrapper.set(getValueClass().newInstance()); } catch (Exception e) { throw new IOException(e.toString()); } boolean res = super.next(key, (Writable) wrapper.get()); wrapper.setMeta(SEGMENT_PART_KEY, spString); return res; } @Override public Writable createValue() { return new MetaWrapper(); } }; } catch (IOException e) { throw new RuntimeException("Cannot create RecordReader: ", e); } }
@Override public void write(DataOutput out) throws IOException { WritableUtils.writeString(out, realSplit.getClass().getName()); ((Writable) realSplit).write(out); }
@Override public String[] getLocations() throws IOException { return realSplit.getLocations(); }
@Override public long getLength() throws IOException { return realSplit.getLength(); }
@Override public RecordReader<LongWritable, Text> getRecordReader( InputSplit split, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); return new ExampleRecordReader(job, (FileSplit) split); }
// constructor used by the old API ESRecordReader( org.apache.hadoop.mapred.InputSplit split, Configuration job, Reporter reporter) { reporter.setStatus(split.toString()); init((ESInputSplit) split, job); }
@Override public RecordReader<LongWritable, Text> getRecordReader( InputSplit split, JobConf conf, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); return new DeprecatedLzoLineRecordReader(conf, (FileSplit) split); }