@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context); initInputFormat(jobConf); org.apache.hadoop.mapred.InputSplit[] splits = realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks()); if (splits == null) { return null; } List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length); for (org.apache.hadoop.mapred.InputSplit split : splits) { if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) { org.apache.hadoop.mapred.FileSplit mapredFileSplit = ((org.apache.hadoop.mapred.FileSplit) split); resultSplits.add( new FileSplit( mapredFileSplit.getPath(), mapredFileSplit.getStart(), mapredFileSplit.getLength(), mapredFileSplit.getLocations())); } else { resultSplits.add(new InputSplitWrapper(split)); } } return resultSplits; }
private long[] getInputSizes(InputFormat[] inputFormats, JobConf[] jobConfs) throws IOException { long[] inputSizes = new long[inputFormats.length]; for (int i = 0; i < inputFormats.length; i++) { InputFormat inputFormat = inputFormats[i]; InputSplit[] splits = inputFormat.getSplits(jobConfs[i], 1); for (InputSplit split : splits) inputSizes[i] = inputSizes[i] + split.getLength(); } return inputSizes; }
private long[] getInputSplitSizes(InputFormat[] inputFormats, JobConf[] jobConfs, int numSplits) throws IOException { long[] inputSizes = new long[inputFormats.length]; for (int i = 0; i < inputFormats.length; i++) { InputFormat inputFormat = inputFormats[i]; InputSplit[] splits = inputFormat.getSplits(jobConfs[i], numSplits); inputSizes[i] = splits.length; } return inputSizes; }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return baseInputFormat.getSplits(job, numSplits); }