示例#1
0
  public ParquetPageSource(
      ParquetReader parquetReader,
      ParquetDataSource dataSource,
      MessageType fileSchema,
      MessageType requestedSchema,
      long totalBytes,
      Properties splitSchema,
      List<HiveColumnHandle> columns,
      TupleDomain<HiveColumnHandle> effectivePredicate,
      TypeManager typeManager,
      boolean useParquetColumnNames) {
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(effectivePredicate, "effectivePredicate is null");

    this.parquetReader = parquetReader;
    this.dataSource = dataSource;
    this.requestedSchema = requestedSchema;
    this.totalBytes = totalBytes;

    int size = columns.size();
    this.constantBlocks = new Block[size];
    this.hiveColumnIndexes = new int[size];

    ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
    ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
    for (int columnIndex = 0; columnIndex < size; columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      checkState(column.getColumnType() == REGULAR, "column type must be regular");

      String name = column.getName();
      Type type = typeManager.getType(column.getTypeSignature());

      namesBuilder.add(name);
      typesBuilder.add(type);

      hiveColumnIndexes[columnIndex] = column.getHiveColumnIndex();

      if (getParquetType(column, fileSchema, useParquetColumnNames) == null) {
        BlockBuilder blockBuilder =
            type.createBlockBuilder(new BlockBuilderStatus(), MAX_VECTOR_LENGTH);
        for (int i = 0; i < MAX_VECTOR_LENGTH; i++) {
          blockBuilder.appendNull();
        }
        constantBlocks[columnIndex] = blockBuilder.build();
      }
    }
    types = typesBuilder.build();
    columnNames = namesBuilder.build();
  }
  public static OrcPageSource createOrcPageSource(
      MetadataReader metadataReader,
      Configuration configuration,
      Path path,
      long start,
      long length,
      List<HiveColumnHandle> columns,
      List<HivePartitionKey> partitionKeys,
      TupleDomain<HiveColumnHandle> effectivePredicate,
      DateTimeZone hiveStorageTimeZone,
      TypeManager typeManager,
      DataSize maxMergeDistance,
      DataSize maxBufferSize,
      DataSize streamBufferSize) {
    OrcDataSource orcDataSource;
    try {
      FileSystem fileSystem = path.getFileSystem(configuration);
      long size = fileSystem.getFileStatus(path).getLen();
      FSDataInputStream inputStream = fileSystem.open(path);
      orcDataSource =
          new HdfsOrcDataSource(
              path.toString(),
              size,
              maxMergeDistance,
              maxBufferSize,
              streamBufferSize,
              inputStream);
    } catch (Exception e) {
      if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed")
          || e instanceof FileNotFoundException) {
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
      }
      throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }

    ImmutableSet.Builder<Integer> includedColumns = ImmutableSet.builder();
    ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences =
        ImmutableList.builder();
    for (HiveColumnHandle column : columns) {
      if (!column.isPartitionKey()) {
        includedColumns.add(column.getHiveColumnIndex());
        Type type = typeManager.getType(column.getTypeSignature());
        columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
      }
    }

    OrcPredicate predicate =
        new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build());

    try {
      OrcReader reader = new OrcReader(orcDataSource, metadataReader);
      OrcRecordReader recordReader =
          reader.createRecordReader(
              includedColumns.build(), predicate, start, length, hiveStorageTimeZone);

      return new OrcPageSource(
          recordReader, orcDataSource, partitionKeys, columns, hiveStorageTimeZone, typeManager);
    } catch (Exception e) {
      try {
        orcDataSource.close();
      } catch (IOException ignored) {
      }
      if (e instanceof PrestoException) {
        throw (PrestoException) e;
      }
      String message = splitError(e, path, start, length);
      if (e.getClass().getSimpleName().equals("BlockMissingException")) {
        throw new PrestoException(HIVE_MISSING_DATA, message, e);
      }
      throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
  }