@Override @SuppressWarnings({"unchecked", "deprecation"}) public List<InputSplit> getSplits(JobContext jobContext) throws IOException { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); Job job = new Job(conf); Format format = dataset.getDescriptor().getFormat(); if (setInputPaths(jobContext, job)) { if (Formats.AVRO.equals(format)) { AvroJob.setInputKeySchema(job, dataset.getDescriptor().getSchema()); AvroKeyInputFormat<E> delegate = new AvroKeyInputFormat<E>(); return delegate.getSplits(jobContext); } else if (Formats.PARQUET.equals(format)) { // TODO: use later version of parquet (with https://github.com/Parquet/parquet-mr/pull/282) // so we can set the schema correctly // AvroParquetInputFormat.setReadSchema(job, view.getDescriptor().getSchema()); AvroParquetInputFormat delegate = new AvroParquetInputFormat(); return delegate.getSplits(jobContext); } else if (Formats.JSON.equals(format)) { return new JSONInputFormat().getSplits(jobContext); } else if (Formats.CSV.equals(format)) { // this generates an unchecked cast exception? return new CSVInputFormat().getSplits(jobContext); } else if (Formats.INPUTFORMAT.equals(format)) { return InputFormatUtil.newInputFormatInstance(dataset.getDescriptor()) .getSplits(jobContext); } else { throw new UnsupportedOperationException("Not a supported format: " + format); } } else { return ImmutableList.of(); } }
@SuppressWarnings("unchecked") private RecordReader<E, Void> createUnfilteredRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Format format = dataset.getDescriptor().getFormat(); if (Formats.AVRO.equals(format)) { return new AvroKeyReaderWrapper(new AvroKeyInputFormat<E>()); } else if (Formats.PARQUET.equals(format)) { return new ValueReaderWrapper(new AvroParquetInputFormat()); } else if (Formats.JSON.equals(format)) { JSONInputFormat<E> delegate = new JSONInputFormat<E>(); delegate.setDescriptor(dataset.getDescriptor()); delegate.setType(dataset.getType()); return delegate.createRecordReader(inputSplit, taskAttemptContext); } else if (Formats.CSV.equals(format)) { CSVInputFormat<E> delegate = new CSVInputFormat<E>(); delegate.setDescriptor(dataset.getDescriptor()); delegate.setType(dataset.getType()); return delegate.createRecordReader(inputSplit, taskAttemptContext); } else if (Formats.INPUTFORMAT.equals(format)) { return InputFormatUtil.newRecordReader(dataset.getDescriptor()); } else { throw new UnsupportedOperationException("Not a supported format: " + format); } }