private void readFixedLenByteArrayBatch(int rowId, int num, ColumnVector column, int arrayLen)
     throws IOException {
   VectorizedValuesReader data = (VectorizedValuesReader) dataColumn;
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (DecimalType.is32BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putInt(
             rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.is64BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putLong(
             rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.isByteArrayDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes());
       } else {
         column.putNull(rowId + i);
       }
     }
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }
 private void readLongBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   if (column.dataType() == DataTypes.LongType
       || DecimalType.is64BitDecimalType(column.dataType())) {
     defColumn.readLongs(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType());
   }
 }
Example #3
0
 @Override
 public Object get(int ordinal, DataType dataType) {
   if (isNullAt(ordinal) || dataType instanceof NullType) {
     return null;
   } else if (dataType instanceof BooleanType) {
     return getBoolean(ordinal);
   } else if (dataType instanceof ByteType) {
     return getByte(ordinal);
   } else if (dataType instanceof ShortType) {
     return getShort(ordinal);
   } else if (dataType instanceof IntegerType) {
     return getInt(ordinal);
   } else if (dataType instanceof LongType) {
     return getLong(ordinal);
   } else if (dataType instanceof FloatType) {
     return getFloat(ordinal);
   } else if (dataType instanceof DoubleType) {
     return getDouble(ordinal);
   } else if (dataType instanceof DecimalType) {
     DecimalType dt = (DecimalType) dataType;
     return getDecimal(ordinal, dt.precision(), dt.scale());
   } else if (dataType instanceof DateType) {
     return getInt(ordinal);
   } else if (dataType instanceof TimestampType) {
     return getLong(ordinal);
   } else if (dataType instanceof BinaryType) {
     return getBinary(ordinal);
   } else if (dataType instanceof StringType) {
     return getUTF8String(ordinal);
   } else if (dataType instanceof CalendarIntervalType) {
     return getInterval(ordinal);
   } else if (dataType instanceof StructType) {
     return getStruct(ordinal, ((StructType) dataType).size());
   } else if (dataType instanceof ArrayType) {
     return getArray(ordinal);
   } else if (dataType instanceof MapType) {
     return getMap(ordinal);
   } else if (dataType instanceof UserDefinedType) {
     return get(ordinal, ((UserDefinedType) dataType).sqlType());
   } else {
     throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
   }
 }
 private void readIntBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (column.dataType() == DataTypes.IntegerType
       || column.dataType() == DataTypes.DateType
       || DecimalType.is32BitDecimalType(column.dataType())) {
     defColumn.readIntegers(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else if (column.dataType() == DataTypes.ByteType) {
     defColumn.readBytes(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else if (column.dataType() == DataTypes.ShortType) {
     defColumn.readShorts(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }
  /** Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`. */
  private void decodeDictionaryIds(
      int rowId, int num, ColumnVector column, ColumnVector dictionaryIds) {
    switch (descriptor.getType()) {
      case INT32:
        if (column.dataType() == DataTypes.IntegerType
            || DecimalType.is32BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putInt(i, dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else if (column.dataType() == DataTypes.ByteType) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else if (column.dataType() == DataTypes.ShortType) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else {
          throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
        }
        break;

      case INT64:
        if (column.dataType() == DataTypes.LongType
            || DecimalType.is64BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putLong(i, dictionary.decodeToLong(dictionaryIds.getInt(i)));
          }
        } else {
          throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
        }
        break;

      case FLOAT:
        for (int i = rowId; i < rowId + num; ++i) {
          column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getInt(i)));
        }
        break;

      case DOUBLE:
        for (int i = rowId; i < rowId + num; ++i) {
          column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getInt(i)));
        }
        break;
      case INT96:
        if (column.dataType() == DataTypes.TimestampType) {
          for (int i = rowId; i < rowId + num; ++i) {
            // TODO: Convert dictionary of Binaries to dictionary of Longs
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
          }
        } else {
          throw new UnsupportedOperationException();
        }
        break;
      case BINARY:
        // TODO: this is incredibly inefficient as it blows up the dictionary right here. We
        // need to do this better. We should probably add the dictionary data to the ColumnVector
        // and reuse it across batches. This should mean adding a ByteArray would just update
        // the length and offset.
        for (int i = rowId; i < rowId + num; ++i) {
          Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
          column.putByteArray(i, v.getBytes());
        }
        break;
      case FIXED_LEN_BYTE_ARRAY:
        // DecimalType written in the legacy mode
        if (DecimalType.is32BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putInt(i, (int) ParquetRowConverter.binaryToUnscaledLong(v));
          }
        } else if (DecimalType.is64BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putLong(i, ParquetRowConverter.binaryToUnscaledLong(v));
          }
        } else if (DecimalType.isByteArrayDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putByteArray(i, v.getBytes());
          }
        } else {
          throw new UnsupportedOperationException();
        }
        break;

      default:
        throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
  }