Esempi in Java per VectorizedRleValuesReader, esempi in Java per VectorizedRleValuesReader

Esempio n. 1

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readFixedLenByteArrayBatch(int rowId, int num, ColumnVector column, int arrayLen)
     throws IOException {
   VectorizedValuesReader data = (VectorizedValuesReader) dataColumn;
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (DecimalType.is32BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putInt(
             rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.is64BitDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putLong(
             rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else if (DecimalType.isByteArrayDecimalType(column.dataType())) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes());
       } else {
         column.putNull(rowId + i);
       }
     }
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }

Esempio n. 2

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readIntBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (column.dataType() == DataTypes.IntegerType
       || column.dataType() == DataTypes.DateType
       || DecimalType.is32BitDecimalType(column.dataType())) {
     defColumn.readIntegers(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else if (column.dataType() == DataTypes.ByteType) {
     defColumn.readBytes(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else if (column.dataType() == DataTypes.ShortType) {
     defColumn.readShorts(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }

Esempio n. 3

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readDoubleBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   if (column.dataType() == DataTypes.DoubleType) {
     defColumn.readDoubles(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }

Esempio n. 4

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readFloatBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   // TODO: support implicit cast to double?
   if (column.dataType() == DataTypes.FloatType) {
     defColumn.readFloats(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType());
   }
 }

Esempio n. 5

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readLongBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   if (column.dataType() == DataTypes.LongType
       || DecimalType.is64BitDecimalType(column.dataType())) {
     defColumn.readLongs(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
   } else {
     throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType());
   }
 }

Esempio n. 6

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 private void readBinaryBatch(int rowId, int num, ColumnVector column) throws IOException {
   // This is where we implement support for the valid type conversions.
   // TODO: implement remaining type conversions
   VectorizedValuesReader data = (VectorizedValuesReader) dataColumn;
   if (column.isArray()) {
     defColumn.readBinarys(num, column, rowId, maxDefLevel, data);
   } else if (column.dataType() == DataTypes.TimestampType) {
     for (int i = 0; i < num; i++) {
       if (defColumn.readInteger() == maxDefLevel) {
         column.putLong(
             rowId + i,
             // Read 12 bytes for INT96
             ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)));
       } else {
         column.putNull(rowId + i);
       }
     }
   } else {
     throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
   }
 }

Esempio n. 7

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

 /**
  * For all the read*Batch functions, reads `num` values from this columnReader into column. It is
  * guaranteed that num is smaller than the number of values left in the current page.
  */
 private void readBooleanBatch(int rowId, int num, ColumnVector column) throws IOException {
   assert (column.dataType() == DataTypes.BooleanType);
   defColumn.readBooleans(num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
 }

Esempio n. 8

0

Mostra file

File: VectorizedColumnReader.java Progetto: ChrisYohann/spark

  /** Reads `total` values from this columnReader into column. */
  void readBatch(int total, ColumnVector column) throws IOException {
    int rowId = 0;
    ColumnVector dictionaryIds = null;
    if (dictionary != null) {
      // SPARK-16334: We only maintain a single dictionary per row batch, so that it can be used to
      // decode all previous dictionary encoded pages if we ever encounter a non-dictionary encoded
      // page.
      dictionaryIds = column.reserveDictionaryIds(total);
    }
    while (total > 0) {
      // Compute the number of values we want to read in this page.
      int leftInPage = (int) (endOfPageValueCount - valuesRead);
      if (leftInPage == 0) {
        readPage();
        leftInPage = (int) (endOfPageValueCount - valuesRead);
      }
      int num = Math.min(total, leftInPage);
      if (isCurrentPageDictionaryEncoded) {
        // Read and decode dictionary ids.
        defColumn.readIntegers(
            num, dictionaryIds, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn);
        if (column.hasDictionary()
            || (rowId == 0
                && (descriptor.getType() == PrimitiveType.PrimitiveTypeName.INT32
                    || descriptor.getType() == PrimitiveType.PrimitiveTypeName.INT64
                    || descriptor.getType() == PrimitiveType.PrimitiveTypeName.FLOAT
                    || descriptor.getType() == PrimitiveType.PrimitiveTypeName.DOUBLE
                    || descriptor.getType() == PrimitiveType.PrimitiveTypeName.BINARY))) {
          // Column vector supports lazy decoding of dictionary values so just set the dictionary.
          // We can't do this if rowId != 0 AND the column doesn't have a dictionary (i.e. some
          // non-dictionary encoded values have already been added).
          column.setDictionary(dictionary);
        } else {
          decodeDictionaryIds(rowId, num, column, dictionaryIds);
        }
      } else {
        if (column.hasDictionary() && rowId != 0) {
          // This batch already has dictionary encoded values but this new page is not. The batch
          // does not support a mix of dictionary and not so we will decode the dictionary.
          decodeDictionaryIds(0, rowId, column, column.getDictionaryIds());
        }
        column.setDictionary(null);
        switch (descriptor.getType()) {
          case BOOLEAN:
            readBooleanBatch(rowId, num, column);
            break;
          case INT32:
            readIntBatch(rowId, num, column);
            break;
          case INT64:
            readLongBatch(rowId, num, column);
            break;
          case INT96:
            readBinaryBatch(rowId, num, column);
            break;
          case FLOAT:
            readFloatBatch(rowId, num, column);
            break;
          case DOUBLE:
            readDoubleBatch(rowId, num, column);
            break;
          case BINARY:
            readBinaryBatch(rowId, num, column);
            break;
          case FIXED_LEN_BYTE_ARRAY:
            readFixedLenByteArrayBatch(rowId, num, column, descriptor.getTypeLength());
            break;
          default:
            throw new IOException("Unsupported type: " + descriptor.getType());
        }
      }

      valuesRead += num;
      rowId += num;
      total -= num;
    }
  }