Ejemplo n.º 1
0
 private void readFixedLenByteArrayBatch(int rowId, int num, ColumnVector column, int arrayLen)
     throws IOException {
   VectorizedValuesReader data = (VectorizedValuesReader) dataColumn;
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (DecimalType.is32BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putInt(
             rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.is64BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putLong(
             rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.isByteArrayDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes());
       } else {
         column.putNull(rowId + i);
       }
     }
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }
Ejemplo n.º 2
0
  /** Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`. */
  private void decodeDictionaryIds(
      int rowId, int num, ColumnVector column, ColumnVector dictionaryIds) {
    switch (descriptor.getType()) {
      case INT32:
        if (column.dataType() == DataTypes.IntegerType
            || DecimalType.is32BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putInt(i, dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else if (column.dataType() == DataTypes.ByteType) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else if (column.dataType() == DataTypes.ShortType) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getInt(i)));
          }
        } else {
          throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
        }
        break;

      case INT64:
        if (column.dataType() == DataTypes.LongType
            || DecimalType.is64BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            column.putLong(i, dictionary.decodeToLong(dictionaryIds.getInt(i)));
          }
        } else {
          throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
        }
        break;

      case FLOAT:
        for (int i = rowId; i < rowId + num; ++i) {
          column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getInt(i)));
        }
        break;

      case DOUBLE:
        for (int i = rowId; i < rowId + num; ++i) {
          column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getInt(i)));
        }
        break;
      case INT96:
        if (column.dataType() == DataTypes.TimestampType) {
          for (int i = rowId; i < rowId + num; ++i) {
            // TODO: Convert dictionary of Binaries to dictionary of Longs
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
          }
        } else {
          throw new UnsupportedOperationException();
        }
        break;
      case BINARY:
        // TODO: this is incredibly inefficient as it blows up the dictionary right here. We
        // need to do this better. We should probably add the dictionary data to the ColumnVector
        // and reuse it across batches. This should mean adding a ByteArray would just update
        // the length and offset.
        for (int i = rowId; i < rowId + num; ++i) {
          Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
          column.putByteArray(i, v.getBytes());
        }
        break;
      case FIXED_LEN_BYTE_ARRAY:
        // DecimalType written in the legacy mode
        if (DecimalType.is32BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putInt(i, (int) ParquetRowConverter.binaryToUnscaledLong(v));
          }
        } else if (DecimalType.is64BitDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putLong(i, ParquetRowConverter.binaryToUnscaledLong(v));
          }
        } else if (DecimalType.isByteArrayDecimalType(column.dataType())) {
          for (int i = rowId; i < rowId + num; ++i) {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
            column.putByteArray(i, v.getBytes());
          }
        } else {
          throw new UnsupportedOperationException();
        }
        break;

      default:
        throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
  }