private void readFixedLenByteArrayBatch(int rowId, int num, ColumnVector column, int arrayLen) throws IOException { VectorizedValuesReader data = (VectorizedValuesReader) dataColumn; // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (DecimalType.is32BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putInt( rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } } } else if (DecimalType.is64BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong( rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } } } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i); } } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
private void readLongBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType())) { defColumn.readLongs(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType()); } }
@Override public Object get(int ordinal, DataType dataType) { if (isNullAt(ordinal) || dataType instanceof NullType) { return null; } else if (dataType instanceof BooleanType) { return getBoolean(ordinal); } else if (dataType instanceof ByteType) { return getByte(ordinal); } else if (dataType instanceof ShortType) { return getShort(ordinal); } else if (dataType instanceof IntegerType) { return getInt(ordinal); } else if (dataType instanceof LongType) { return getLong(ordinal); } else if (dataType instanceof FloatType) { return getFloat(ordinal); } else if (dataType instanceof DoubleType) { return getDouble(ordinal); } else if (dataType instanceof DecimalType) { DecimalType dt = (DecimalType) dataType; return getDecimal(ordinal, dt.precision(), dt.scale()); } else if (dataType instanceof DateType) { return getInt(ordinal); } else if (dataType instanceof TimestampType) { return getLong(ordinal); } else if (dataType instanceof BinaryType) { return getBinary(ordinal); } else if (dataType instanceof StringType) { return getUTF8String(ordinal); } else if (dataType instanceof CalendarIntervalType) { return getInterval(ordinal); } else if (dataType instanceof StructType) { return getStruct(ordinal, ((StructType) dataType).size()); } else if (dataType instanceof ArrayType) { return getArray(ordinal); } else if (dataType instanceof MapType) { return getMap(ordinal); } else if (dataType instanceof UserDefinedType) { return get(ordinal, ((UserDefinedType) dataType).sqlType()); } else { throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString()); } }
private void readIntBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (column.dataType() == DataTypes.IntegerType || column.dataType() == DataTypes.DateType || DecimalType.is32BitDecimalType(column.dataType())) { defColumn.readIntegers(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ByteType) { defColumn.readBytes(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ShortType) { defColumn.readShorts(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
/** Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`. */ private void decodeDictionaryIds( int rowId, int num, ColumnVector column, ColumnVector dictionaryIds) { switch (descriptor.getType()) { case INT32: if (column.dataType() == DataTypes.IntegerType || DecimalType.is32BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { column.putInt(i, dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else if (column.dataType() == DataTypes.ByteType) { for (int i = rowId; i < rowId + num; ++i) { column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else if (column.dataType() == DataTypes.ShortType) { for (int i = rowId; i < rowId + num; ++i) { column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getInt(i))); } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } break; case INT64: if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { column.putLong(i, dictionary.decodeToLong(dictionaryIds.getInt(i))); } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } break; case FLOAT: for (int i = rowId; i < rowId + num; ++i) { column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getInt(i))); } break; case DOUBLE: for (int i = rowId; i < rowId + num; ++i) { column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getInt(i))); } break; case INT96: if (column.dataType() == DataTypes.TimestampType) { for (int i = rowId; i < rowId + num; ++i) { // TODO: Convert dictionary of Binaries to dictionary of Longs Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v)); } } else { throw new UnsupportedOperationException(); } break; case BINARY: // TODO: this is incredibly inefficient as it blows up the dictionary right here. We // need to do this better. We should probably add the dictionary data to the ColumnVector // and reuse it across batches. This should mean adding a ByteArray would just update // the length and offset. for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putByteArray(i, v.getBytes()); } break; case FIXED_LEN_BYTE_ARRAY: // DecimalType written in the legacy mode if (DecimalType.is32BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putInt(i, (int) ParquetRowConverter.binaryToUnscaledLong(v)); } } else if (DecimalType.is64BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putLong(i, ParquetRowConverter.binaryToUnscaledLong(v)); } } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putByteArray(i, v.getBytes()); } } else { throw new UnsupportedOperationException(); } break; default: throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType()); } }