Ejemplo n.º 1
0
  private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(
      Configuration configuration,
      Path path,
      long start,
      long length,
      List<HiveColumnHandle> columns,
      boolean useParquetColumnNames) {
    try {
      ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path);
      List<BlockMetaData> blocks = parquetMetadata.getBlocks();
      FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      PrestoReadSupport readSupport =
          new PrestoReadSupport(useParquetColumnNames, columns, fileMetaData.getSchema());
      ReadContext readContext =
          readSupport.init(
              configuration, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());

      List<BlockMetaData> splitGroup = new ArrayList<>();
      long splitStart = start;
      long splitLength = length;
      for (BlockMetaData block : blocks) {
        long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
        if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
          splitGroup.add(block);
        }
      }

      ParquetInputSplit split;

      split =
          new ParquetInputSplit(
              path,
              splitStart,
              splitLength,
              null,
              splitGroup,
              readContext.getRequestedSchema().toString(),
              fileMetaData.getSchema().toString(),
              fileMetaData.getKeyValueMetaData(),
              readContext.getReadSupportMetadata());

      TaskAttemptContext taskContext =
          ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
      ParquetRecordReader<FakeParquetRecord> realReader =
          new PrestoParquetRecordReader(readSupport);
      realReader.initialize(split, taskContext);
      return realReader;
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw Throwables.propagate(e);
    }
  }
 public static void setSchema(Job job, HAWQSchema schema) {
   hawqSchema = schema;
   HAWQWriteSupport.setSchema(ContextUtil.getConfiguration(job), hawqSchema);
 }