コード例 #1
0
  @Override
  public Page getNextPage() {
    try {
      batchId++;
      long start = System.nanoTime();

      int batchSize = parquetReader.nextBatch();

      readTimeNanos += System.nanoTime() - start;

      if (closed || batchSize <= 0) {
        close();
        return null;
      }

      Block[] blocks = new Block[hiveColumnIndexes.length];
      for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
        Type type = types.get(fieldId);
        if (constantBlocks[fieldId] != null) {
          blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize);
        } else {
          int fieldIndex = requestedSchema.getFieldIndex(columnNames.get(fieldId));
          ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(fieldIndex);
          blocks[fieldId] =
              new LazyBlock(batchSize, new ParquetBlockLoader(columnDescriptor, type));
        }
      }
      return new Page(batchSize, blocks);
    } catch (PrestoException e) {
      closeWithSuppression(e);
      throw e;
    } catch (IOException | RuntimeException | InterruptedException e) {
      if (e instanceof InterruptedException) {
        Thread.currentThread().interrupt();
      }
      closeWithSuppression(e);
      throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
  }
コード例 #2
0
  /** {@inheritDoc} */
  @Override
  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit;
    this.requestedSchema =
        MessageTypeParser.parseMessageType(parquetInputSplit.getRequestedSchema());
    this.columnCount = this.requestedSchema.getPaths().size();
    this.recordConverter =
        readSupport.prepareForRead(
            configuration,
            parquetInputSplit.getExtraMetadata(),
            MessageTypeParser.parseMessageType(parquetInputSplit.getSchema()),
            new ReadSupport.ReadContext(requestedSchema));

    Path path = parquetInputSplit.getPath();
    List<BlockMetaData> blocks = parquetInputSplit.getBlocks();
    List<ColumnDescriptor> columns = requestedSchema.getColumns();
    reader = new ParquetFileReader(configuration, path, blocks, columns);
    for (BlockMetaData block : blocks) {
      total += block.getRowCount();
    }
    LOG.info("RecordReader initialized will read a total of " + total + " records.");
  }