예제 #1
0
  @Override
  @SuppressWarnings({"unchecked", "deprecation"})
  public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
    Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
    Job job = new Job(conf);
    Format format = dataset.getDescriptor().getFormat();

    if (setInputPaths(jobContext, job)) {
      if (Formats.AVRO.equals(format)) {
        AvroJob.setInputKeySchema(job, dataset.getDescriptor().getSchema());
        AvroKeyInputFormat<E> delegate = new AvroKeyInputFormat<E>();
        return delegate.getSplits(jobContext);
      } else if (Formats.PARQUET.equals(format)) {
        // TODO: use later version of parquet (with https://github.com/Parquet/parquet-mr/pull/282)
        // so we can set the schema correctly
        // AvroParquetInputFormat.setReadSchema(job, view.getDescriptor().getSchema());
        AvroParquetInputFormat delegate = new AvroParquetInputFormat();
        return delegate.getSplits(jobContext);
      } else if (Formats.JSON.equals(format)) {
        return new JSONInputFormat().getSplits(jobContext);
      } else if (Formats.CSV.equals(format)) {
        // this generates an unchecked cast exception?
        return new CSVInputFormat().getSplits(jobContext);
      } else if (Formats.INPUTFORMAT.equals(format)) {
        return InputFormatUtil.newInputFormatInstance(dataset.getDescriptor())
            .getSplits(jobContext);
      } else {
        throw new UnsupportedOperationException("Not a supported format: " + format);
      }
    } else {
      return ImmutableList.of();
    }
  }
예제 #2
0
 @SuppressWarnings("unchecked")
 private RecordReader<E, Void> createUnfilteredRecordReader(
     InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
     throws IOException, InterruptedException {
   Format format = dataset.getDescriptor().getFormat();
   if (Formats.AVRO.equals(format)) {
     return new AvroKeyReaderWrapper(new AvroKeyInputFormat<E>());
   } else if (Formats.PARQUET.equals(format)) {
     return new ValueReaderWrapper(new AvroParquetInputFormat());
   } else if (Formats.JSON.equals(format)) {
     JSONInputFormat<E> delegate = new JSONInputFormat<E>();
     delegate.setDescriptor(dataset.getDescriptor());
     delegate.setType(dataset.getType());
     return delegate.createRecordReader(inputSplit, taskAttemptContext);
   } else if (Formats.CSV.equals(format)) {
     CSVInputFormat<E> delegate = new CSVInputFormat<E>();
     delegate.setDescriptor(dataset.getDescriptor());
     delegate.setType(dataset.getType());
     return delegate.createRecordReader(inputSplit, taskAttemptContext);
   } else if (Formats.INPUTFORMAT.equals(format)) {
     return InputFormatUtil.newRecordReader(dataset.getDescriptor());
   } else {
     throw new UnsupportedOperationException("Not a supported format: " + format);
   }
 }