protected ColumnReader( ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException { this.parentReader = parentReader; this.columnDescriptor = descriptor; this.columnChunkMetaData = columnChunkMetaData; this.isFixedLength = fixedLength; this.schemaElement = schemaElement; this.valueVec = v; this.pageReader = new PageReader( this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData); if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8; } else { dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType()); } } }
protected ColumnReader( ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, ValueVector v) throws ExecutionSetupException { this.parentReader = parentReader; this.columnDescriptor = descriptor; this.columnChunkMetaData = columnChunkMetaData; this.isFixedLength = fixedLength; if (allocateSize > 1) { valueVecHolder = new VectorHolder(allocateSize, v); } else { valueVecHolder = new VectorHolder(5000, v); } this.pageReadStatus = new PageReadStatus( this, parentReader.fileSystem, parentReader.hadoopPath, columnChunkMetaData); if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType()); } }
/** * start a column inside a block * * @param descriptor the column descriptor * @param valueCount the value count in this column * @param statistics the statistics in this column * @param compressionCodecName * @throws IOException */ public void startColumn( ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName) throws IOException { state = state.startColumn(); if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount); currentEncodings = new HashSet<parquet.column.Encoding>(); currentChunkPath = ColumnPath.get(descriptor.getPath()); currentChunkType = descriptor.getType(); currentChunkCodec = compressionCodecName; currentChunkValueCount = valueCount; currentChunkFirstDataPage = out.getPos(); compressedLength = 0; uncompressedLength = 0; // need to know what type of stats to initialize to // better way to do this? currentStatistics = Statistics.getStatsBasedOnType(currentChunkType); }