public static RecordReader<?, ?> createRecordReader( Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns) { // determine which hive columns we will read List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR)); List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex)); // Tell hive the columns we would like to read, this lets hive optimize reading column oriented // files setReadColumns(configuration, readHiveColumnIndexes); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true); JobConf jobConf = new JobConf(configuration); FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null); // propagate serialization configuration to getRecordReader schema .stringPropertyNames() .stream() .filter(name -> name.startsWith("serialization.")) .forEach(name -> jobConf.set(name, schema.getProperty(name))); try { return retry() .stopOnIllegalExceptions() .run( "createRecordReader", () -> inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL)); } catch (Exception e) { throw new PrestoException( HIVE_CANNOT_OPEN_SPLIT, format( "Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), e.getMessage()), e); } }
@Override public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { org.apache.hadoop.mapred.InputSplit oldSplit; if (split.getClass() == FileSplit.class) { oldSplit = new org.apache.hadoop.mapred.FileSplit( ((FileSplit) split).getPath(), ((FileSplit) split).getStart(), ((FileSplit) split).getLength(), split.getLocations()); } else { oldSplit = ((InputSplitWrapper) split).realSplit; } @SuppressWarnings("unchecked") Reporter reporter = new Reporter() { // Reporter interface over ctx final TaskInputOutputContext ioCtx = context instanceof TaskInputOutputContext ? (TaskInputOutputContext) context : null; public void progress() { HadoopCompat.progress(context); } // @Override public float getProgress() { return (ioCtx != null) ? ioCtx.getProgress() : 0; } public void setStatus(String status) { if (ioCtx != null) HadoopCompat.setStatus(ioCtx, status); } public void incrCounter(String group, String counter, long amount) { if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(group, counter), amount); } @SuppressWarnings("unchecked") public void incrCounter(Enum<?> key, long amount) { if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(key), amount); } public org.apache.hadoop.mapred.InputSplit getInputSplit() throws UnsupportedOperationException { throw new UnsupportedOperationException(); } public Counter getCounter(String group, String name) { return ioCtx != null ? (Counter) HadoopCompat.getCounter(ioCtx, group, name) : null; } @SuppressWarnings("unchecked") public Counter getCounter(Enum<?> name) { return ioCtx != null ? (Counter) ioCtx.getCounter(name) : null; } }; realReader = realInputFormat.getRecordReader( oldSplit, (JobConf) HadoopCompat.getConfiguration(context), reporter); keyObj = realReader.createKey(); valueObj = realReader.createValue(); }
@Override public RecordReader<MapWritable, MapWritable> getRecordReader( InputSplit split, JobConf job, Reporter reporter) throws IOException { return baseInputFormat.getRecordReader(split, job, reporter); }