private void readFixedLenByteArrayBatch(int rowId, int num, ColumnVector column, int arrayLen) throws IOException { VectorizedValuesReader data = (VectorizedValuesReader) dataColumn; // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (DecimalType.is32BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putInt( rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } } } else if (DecimalType.is64BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong( rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } } } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i); } } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
/** Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`. */ private void decodeDictionaryIds( int rowId, int num, ColumnVector column, ColumnVector dictionaryIds) { switch (descriptor.getType()) { case INT32: if (column.dataType() == DataTypes.IntegerType || DecimalType.is32BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { column.putInt(i, dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else if (column.dataType() == DataTypes.ByteType) { for (int i = rowId; i < rowId + num; ++i) { column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else if (column.dataType() == DataTypes.ShortType) { for (int i = rowId; i < rowId + num; ++i) { column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } break; case INT64: if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { column.putLong(i, dictionary.decodeToLong(dictionaryIds.getInt(i))); } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } break; case FLOAT: for (int i = rowId; i < rowId + num; ++i) { column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getInt(i))); } break; case DOUBLE: for (int i = rowId; i < rowId + num; ++i) { column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getInt(i))); } break; case INT96: if (column.dataType() == DataTypes.TimestampType) { for (int i = rowId; i < rowId + num; ++i) { // TODO: Convert dictionary of Binaries to dictionary of Longs Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v)); } } else { throw new UnsupportedOperationException(); } break; case BINARY: // TODO: this is incredibly inefficient as it blows up the dictionary right here. We // need to do this better. We should probably add the dictionary data to the ColumnVector // and reuse it across batches. This should mean adding a ByteArray would just update // the length and offset. for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putByteArray(i, v.getBytes()); } break; case FIXED_LEN_BYTE_ARRAY: // DecimalType written in the legacy mode if (DecimalType.is32BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putInt(i, (int) ParquetRowConverter.binaryToUnscaledLong(v)); } } else if (DecimalType.is64BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putLong(i, ParquetRowConverter.binaryToUnscaledLong(v)); } } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putByteArray(i, v.getBytes()); } } else { throw new UnsupportedOperationException(); } break; default: throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType()); } }