public ParquetListConverter(Type prestoType, String columnName, GroupType listType) { checkArgument( listType.getFieldCount() == 1, "Expected LIST column '%s' to only have one field, but has %s fields", columnName, listType.getFieldCount()); checkArgument(ARRAY.equals(prestoType.getTypeSignature().getBase())); this.arrayType = prestoType; // The Parquet specification requires that the element value of a // LIST type be wrapped in an inner repeated group, like so: // // optional group listField (LIST) { // repeated group list { // optional int element // } // } // // However, some parquet libraries don't follow this spec. The // compatibility rules used here are specified in the Parquet // documentation at http://git.io/vOpNz. parquet.schema.Type elementType = listType.getType(0); if (isElementType(elementType, listType.getName())) { elementConverter = createConverter( prestoType.getTypeParameters().get(0), columnName + ".element", elementType); } else { elementConverter = new ParquetListEntryConverter( prestoType.getTypeParameters().get(0), columnName, elementType.asGroupType()); } }
private static Object nativeContainerToOrcValue(Type type, Object nativeValue) { if (nativeValue == null) { return null; } if (type instanceof DecimalType) { BigInteger unscaledValue; DecimalType decimalType = (DecimalType) type; if (decimalType.isShort()) { unscaledValue = BigInteger.valueOf((long) nativeValue); } else { unscaledValue = Decimals.decodeUnscaledValue((Slice) nativeValue); } return HiveDecimal.create(unscaledValue, decimalType.getScale()); } if (type.getJavaType() == boolean.class) { return nativeValue; } if (type.getJavaType() == long.class) { return nativeValue; } if (type.getJavaType() == double.class) { return nativeValue; } if (type.getJavaType() == Slice.class) { Slice slice = (Slice) nativeValue; return type instanceof VarcharType ? slice.toStringUtf8() : slice.getBytes(); } if (isArrayType(type)) { Block arrayBlock = (Block) nativeValue; Type elementType = type.getTypeParameters().get(0); List<Object> list = new ArrayList<>(); for (int i = 0; i < arrayBlock.getPositionCount(); i++) { list.add( nativeContainerToOrcValue( elementType, getNativeContainerValue(elementType, arrayBlock, i))); } return list; } if (isMapType(type)) { Block mapBlock = (Block) nativeValue; Type keyType = type.getTypeParameters().get(0); Type valueType = type.getTypeParameters().get(1); Map<Object, Object> map = new HashMap<>(); for (int i = 0; i < mapBlock.getPositionCount(); i += 2) { Object key = nativeContainerToOrcValue(keyType, getNativeContainerValue(keyType, mapBlock, i)); Object value = nativeContainerToOrcValue( valueType, getNativeContainerValue(valueType, mapBlock, i + 1)); map.put(key, value); } return map; } throw new PrestoException(INTERNAL_ERROR, "Unimplemented type: " + type); }
private static Block serializeList( Type type, BlockBuilder builder, Object object, ListObjectInspector inspector) { List<?> list = inspector.getList(object); if (list == null) { requireNonNull(builder, "parent builder is null").appendNull(); return null; } List<Type> typeParameters = type.getTypeParameters(); checkArgument(typeParameters.size() == 1, "list must have exactly 1 type parameter"); Type elementType = typeParameters.get(0); ObjectInspector elementInspector = inspector.getListElementObjectInspector(); BlockBuilder currentBuilder; if (builder != null) { currentBuilder = builder.beginBlockEntry(); } else { currentBuilder = elementType.createBlockBuilder(new BlockBuilderStatus(), list.size()); } for (Object element : list) { serializeObject(elementType, currentBuilder, element, elementInspector); } if (builder != null) { builder.closeEntry(); return null; } else { Block resultBlock = currentBuilder.build(); return resultBlock; } }
public ParquetMapEntryConverter(Type prestoType, String columnName, GroupType entryType) { checkArgument(StandardTypes.MAP.equals(prestoType.getTypeSignature().getBase())); // original version of parquet used null for entry due to a bug if (entryType.getOriginalType() != null) { checkArgument( entryType.getOriginalType() == MAP_KEY_VALUE, "Expected MAP column '%s' field to be type %s, but is %s", columnName, MAP_KEY_VALUE, entryType); } GroupType entryGroupType = entryType.asGroupType(); checkArgument( entryGroupType.getFieldCount() == 2, "Expected MAP column '%s' entry to have two fields, but has %s fields", columnName, entryGroupType.getFieldCount()); checkArgument( entryGroupType.getFieldName(0).equals("key"), "Expected MAP column '%s' entry field 0 to be named 'key', but is named %s", columnName, entryGroupType.getFieldName(0)); checkArgument( entryGroupType.getFieldName(1).equals("value"), "Expected MAP column '%s' entry field 1 to be named 'value', but is named %s", columnName, entryGroupType.getFieldName(1)); checkArgument( entryGroupType.getType(0).isPrimitive(), "Expected MAP column '%s' entry field 0 to be primitive, but is named %s", columnName, entryGroupType.getType(0)); keyConverter = createConverter( prestoType.getTypeParameters().get(0), columnName + ".key", entryGroupType.getFields().get(0)); valueConverter = createConverter( prestoType.getTypeParameters().get(1), columnName + ".value", entryGroupType.getFields().get(1)); }
public ParquetStructConverter(Type prestoType, String columnName, GroupType entryType) { checkArgument(ROW.equals(prestoType.getTypeSignature().getBase())); List<Type> prestoTypeParameters = prestoType.getTypeParameters(); List<parquet.schema.Type> fieldTypes = entryType.getFields(); checkArgument(prestoTypeParameters.size() == fieldTypes.size()); this.rowType = prestoType; ImmutableList.Builder<BlockConverter> converters = ImmutableList.builder(); for (int i = 0; i < prestoTypeParameters.size(); i++) { parquet.schema.Type fieldType = fieldTypes.get(i); converters.add( createConverter( prestoTypeParameters.get(i), columnName + "." + fieldType.getName(), fieldType)); } this.converters = converters.build(); }
private static Block serializeMap( Type type, BlockBuilder builder, Object object, MapObjectInspector inspector) { Map<?, ?> map = inspector.getMap(object); if (map == null) { requireNonNull(builder, "parent builder is null").appendNull(); return null; } List<Type> typeParameters = type.getTypeParameters(); checkArgument(typeParameters.size() == 2, "map must have exactly 2 type parameter"); Type keyType = typeParameters.get(0); Type valueType = typeParameters.get(1); ObjectInspector keyInspector = inspector.getMapKeyObjectInspector(); ObjectInspector valueInspector = inspector.getMapValueObjectInspector(); BlockBuilder currentBuilder; if (builder != null) { currentBuilder = builder.beginBlockEntry(); } else { currentBuilder = new InterleavedBlockBuilder(typeParameters, new BlockBuilderStatus(), map.size()); } for (Map.Entry<?, ?> entry : map.entrySet()) { // Hive skips map entries with null keys if (entry.getKey() != null) { serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector); serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector); } } if (builder != null) { builder.closeEntry(); return null; } else { Block resultBlock = currentBuilder.build(); return resultBlock; } }
private static Block serializeStruct( Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) { if (object == null) { requireNonNull(builder, "parent builder is null").appendNull(); return null; } List<Type> typeParameters = type.getTypeParameters(); List<? extends StructField> allStructFieldRefs = inspector.getAllStructFieldRefs(); checkArgument(typeParameters.size() == allStructFieldRefs.size()); BlockBuilder currentBuilder; if (builder != null) { currentBuilder = builder.beginBlockEntry(); } else { currentBuilder = new InterleavedBlockBuilder( typeParameters, new BlockBuilderStatus(), typeParameters.size()); } for (int i = 0; i < typeParameters.size(); i++) { StructField field = allStructFieldRefs.get(i); serializeObject( typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector()); } if (builder != null) { builder.closeEntry(); return null; } else { Block resultBlock = currentBuilder.build(); return resultBlock; } }
@Override public Block readBlock(Type type) throws IOException { if (!rowGroupOpen) { openRowGroup(); } if (readOffset > 0) { if (presentStream != null) { // skip ahead the present bit reader, but count the set bits // and use this as the skip size for the field readers readOffset = presentStream.countBitsSet(readOffset); } for (StreamReader structField : structFields) { structField.prepareNextRead(readOffset); } } List<Type> typeParameters = type.getTypeParameters(); boolean[] nullVector = new boolean[nextBatchSize]; Block[] blocks = new Block[typeParameters.size()]; if (presentStream == null) { for (int i = 0; i < typeParameters.size(); i++) { StreamReader structField = structFields[i]; structField.prepareNextRead(nextBatchSize); blocks[i] = structField.readBlock(typeParameters.get(i)); } } else { int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector); if (nullValues != nextBatchSize) { for (int i = 0; i < typeParameters.size(); i++) { StreamReader structField = structFields[i]; structField.prepareNextRead(nextBatchSize - nullValues); blocks[i] = structField.readBlock(typeParameters.get(i)); } } else { for (int i = 0; i < typeParameters.size(); i++) { blocks[i] = typeParameters.get(i).createBlockBuilder(new BlockBuilderStatus(), 0).build(); } } } // Build offsets for array block (null valued have no positions) int[] offsets = new int[nextBatchSize]; offsets[0] = (nullVector[0] ? 0 : typeParameters.size()); for (int i = 1; i < nextBatchSize; i++) { offsets[i] = offsets[i - 1] + (nullVector[i] ? 0 : typeParameters.size()); } // Struct is represented as an array block holding an interleaved block InterleavedBlock interleavedBlock = new InterleavedBlock(blocks); ArrayBlock arrayBlock = new ArrayBlock( interleavedBlock, Slices.wrappedIntArray(offsets), 0, Slices.wrappedBooleanArray(nullVector)); readOffset = 0; nextBatchSize = 0; return arrayBlock; }