public ParquetPageSource( ParquetReader parquetReader, ParquetDataSource dataSource, MessageType fileSchema, MessageType requestedSchema, long totalBytes, Properties splitSchema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, TypeManager typeManager, boolean useParquetColumnNames) { checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(columns, "columns is null"); requireNonNull(effectivePredicate, "effectivePredicate is null"); this.parquetReader = parquetReader; this.dataSource = dataSource; this.requestedSchema = requestedSchema; this.totalBytes = totalBytes; int size = columns.size(); this.constantBlocks = new Block[size]; this.hiveColumnIndexes = new int[size]; ImmutableList.Builder<String> namesBuilder = ImmutableList.builder(); ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder(); for (int columnIndex = 0; columnIndex < size; columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); checkState(column.getColumnType() == REGULAR, "column type must be regular"); String name = column.getName(); Type type = typeManager.getType(column.getTypeSignature()); namesBuilder.add(name); typesBuilder.add(type); hiveColumnIndexes[columnIndex] = column.getHiveColumnIndex(); if (getParquetType(column, fileSchema, useParquetColumnNames) == null) { BlockBuilder blockBuilder = type.createBlockBuilder(new BlockBuilderStatus(), MAX_VECTOR_LENGTH); for (int i = 0; i < MAX_VECTOR_LENGTH; i++) { blockBuilder.appendNull(); } constantBlocks[columnIndex] = blockBuilder.build(); } } types = typesBuilder.build(); columnNames = namesBuilder.build(); }
public static OrcPageSource createOrcPageSource( MetadataReader metadataReader, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize) { OrcDataSource orcDataSource; try { FileSystem fileSystem = path.getFileSystem(configuration); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); orcDataSource = new HdfsOrcDataSource( path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream); } catch (Exception e) { if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) { throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e); } ImmutableSet.Builder<Integer> includedColumns = ImmutableSet.builder(); ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder(); for (HiveColumnHandle column : columns) { if (!column.isPartitionKey()) { includedColumns.add(column.getHiveColumnIndex()); Type type = typeManager.getType(column.getTypeSignature()); columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type)); } } OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build()); try { OrcReader reader = new OrcReader(orcDataSource, metadataReader); OrcRecordReader recordReader = reader.createRecordReader( includedColumns.build(), predicate, start, length, hiveStorageTimeZone); return new OrcPageSource( recordReader, orcDataSource, partitionKeys, columns, hiveStorageTimeZone, typeManager); } catch (Exception e) { try { orcDataSource.close(); } catch (IOException ignored) { } if (e instanceof PrestoException) { throw (PrestoException) e; } String message = splitError(e, path, start, length); if (e.getClass().getSimpleName().equals("BlockMissingException")) { throw new PrestoException(HIVE_MISSING_DATA, message, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e); } }