private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader( Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames) { try { ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path); List<BlockMetaData> blocks = parquetMetadata.getBlocks(); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileMetaData.getSchema()); ReadContext readContext = readSupport.init( configuration, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema()); List<BlockMetaData> splitGroup = new ArrayList<>(); long splitStart = start; long splitLength = length; for (BlockMetaData block : blocks) { long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { splitGroup.add(block); } } ParquetInputSplit split; split = new ParquetInputSplit( path, splitStart, splitLength, null, splitGroup, readContext.getRequestedSchema().toString(), fileMetaData.getSchema().toString(), fileMetaData.getKeyValueMetaData(), readContext.getReadSupportMetadata()); TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID()); ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport); realReader.initialize(split, taskContext); return realReader; } catch (IOException e) { throw Throwables.propagate(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw Throwables.propagate(e); } }
public static void setSchema(Job job, HAWQSchema schema) { hawqSchema = schema; HAWQWriteSupport.setSchema(ContextUtil.getConfiguration(job), hawqSchema); }