public ParquetListConverter(Type prestoType, String columnName, GroupType listType) {
      checkArgument(
          listType.getFieldCount() == 1,
          "Expected LIST column '%s' to only have one field, but has %s fields",
          columnName,
          listType.getFieldCount());
      checkArgument(ARRAY.equals(prestoType.getTypeSignature().getBase()));

      this.arrayType = prestoType;

      // The Parquet specification requires that the element value of a
      // LIST type be wrapped in an inner repeated group, like so:
      //
      // optional group listField (LIST) {
      //   repeated group list {
      //     optional int element
      //   }
      // }
      //
      // However, some parquet libraries don't follow this spec. The
      // compatibility rules used here are specified in the Parquet
      // documentation at http://git.io/vOpNz.
      parquet.schema.Type elementType = listType.getType(0);
      if (isElementType(elementType, listType.getName())) {
        elementConverter =
            createConverter(
                prestoType.getTypeParameters().get(0), columnName + ".element", elementType);
      } else {
        elementConverter =
            new ParquetListEntryConverter(
                prestoType.getTypeParameters().get(0), columnName, elementType.asGroupType());
      }
    }
Exemple #2
0
 private static Object nativeContainerToOrcValue(Type type, Object nativeValue) {
   if (nativeValue == null) {
     return null;
   }
   if (type instanceof DecimalType) {
     BigInteger unscaledValue;
     DecimalType decimalType = (DecimalType) type;
     if (decimalType.isShort()) {
       unscaledValue = BigInteger.valueOf((long) nativeValue);
     } else {
       unscaledValue = Decimals.decodeUnscaledValue((Slice) nativeValue);
     }
     return HiveDecimal.create(unscaledValue, decimalType.getScale());
   }
   if (type.getJavaType() == boolean.class) {
     return nativeValue;
   }
   if (type.getJavaType() == long.class) {
     return nativeValue;
   }
   if (type.getJavaType() == double.class) {
     return nativeValue;
   }
   if (type.getJavaType() == Slice.class) {
     Slice slice = (Slice) nativeValue;
     return type instanceof VarcharType ? slice.toStringUtf8() : slice.getBytes();
   }
   if (isArrayType(type)) {
     Block arrayBlock = (Block) nativeValue;
     Type elementType = type.getTypeParameters().get(0);
     List<Object> list = new ArrayList<>();
     for (int i = 0; i < arrayBlock.getPositionCount(); i++) {
       list.add(
           nativeContainerToOrcValue(
               elementType, getNativeContainerValue(elementType, arrayBlock, i)));
     }
     return list;
   }
   if (isMapType(type)) {
     Block mapBlock = (Block) nativeValue;
     Type keyType = type.getTypeParameters().get(0);
     Type valueType = type.getTypeParameters().get(1);
     Map<Object, Object> map = new HashMap<>();
     for (int i = 0; i < mapBlock.getPositionCount(); i += 2) {
       Object key =
           nativeContainerToOrcValue(keyType, getNativeContainerValue(keyType, mapBlock, i));
       Object value =
           nativeContainerToOrcValue(
               valueType, getNativeContainerValue(valueType, mapBlock, i + 1));
       map.put(key, value);
     }
     return map;
   }
   throw new PrestoException(INTERNAL_ERROR, "Unimplemented type: " + type);
 }
Exemple #3
0
  private static Block serializeList(
      Type type, BlockBuilder builder, Object object, ListObjectInspector inspector) {
    List<?> list = inspector.getList(object);
    if (list == null) {
      requireNonNull(builder, "parent builder is null").appendNull();
      return null;
    }

    List<Type> typeParameters = type.getTypeParameters();
    checkArgument(typeParameters.size() == 1, "list must have exactly 1 type parameter");
    Type elementType = typeParameters.get(0);
    ObjectInspector elementInspector = inspector.getListElementObjectInspector();
    BlockBuilder currentBuilder;
    if (builder != null) {
      currentBuilder = builder.beginBlockEntry();
    } else {
      currentBuilder = elementType.createBlockBuilder(new BlockBuilderStatus(), list.size());
    }

    for (Object element : list) {
      serializeObject(elementType, currentBuilder, element, elementInspector);
    }

    if (builder != null) {
      builder.closeEntry();
      return null;
    } else {
      Block resultBlock = currentBuilder.build();
      return resultBlock;
    }
  }
    public ParquetMapEntryConverter(Type prestoType, String columnName, GroupType entryType) {
      checkArgument(StandardTypes.MAP.equals(prestoType.getTypeSignature().getBase()));
      // original version of parquet used null for entry due to a bug
      if (entryType.getOriginalType() != null) {
        checkArgument(
            entryType.getOriginalType() == MAP_KEY_VALUE,
            "Expected MAP column '%s' field to be type %s, but is %s",
            columnName,
            MAP_KEY_VALUE,
            entryType);
      }

      GroupType entryGroupType = entryType.asGroupType();
      checkArgument(
          entryGroupType.getFieldCount() == 2,
          "Expected MAP column '%s' entry to have two fields, but has %s fields",
          columnName,
          entryGroupType.getFieldCount());
      checkArgument(
          entryGroupType.getFieldName(0).equals("key"),
          "Expected MAP column '%s' entry field 0 to be named 'key', but is named %s",
          columnName,
          entryGroupType.getFieldName(0));
      checkArgument(
          entryGroupType.getFieldName(1).equals("value"),
          "Expected MAP column '%s' entry field 1 to be named 'value', but is named %s",
          columnName,
          entryGroupType.getFieldName(1));
      checkArgument(
          entryGroupType.getType(0).isPrimitive(),
          "Expected MAP column '%s' entry field 0 to be primitive, but is named %s",
          columnName,
          entryGroupType.getType(0));

      keyConverter =
          createConverter(
              prestoType.getTypeParameters().get(0),
              columnName + ".key",
              entryGroupType.getFields().get(0));
      valueConverter =
          createConverter(
              prestoType.getTypeParameters().get(1),
              columnName + ".value",
              entryGroupType.getFields().get(1));
    }
    public ParquetStructConverter(Type prestoType, String columnName, GroupType entryType) {
      checkArgument(ROW.equals(prestoType.getTypeSignature().getBase()));
      List<Type> prestoTypeParameters = prestoType.getTypeParameters();
      List<parquet.schema.Type> fieldTypes = entryType.getFields();
      checkArgument(prestoTypeParameters.size() == fieldTypes.size());

      this.rowType = prestoType;

      ImmutableList.Builder<BlockConverter> converters = ImmutableList.builder();
      for (int i = 0; i < prestoTypeParameters.size(); i++) {
        parquet.schema.Type fieldType = fieldTypes.get(i);
        converters.add(
            createConverter(
                prestoTypeParameters.get(i), columnName + "." + fieldType.getName(), fieldType));
      }
      this.converters = converters.build();
    }
Exemple #6
0
  private static Block serializeMap(
      Type type, BlockBuilder builder, Object object, MapObjectInspector inspector) {
    Map<?, ?> map = inspector.getMap(object);
    if (map == null) {
      requireNonNull(builder, "parent builder is null").appendNull();
      return null;
    }

    List<Type> typeParameters = type.getTypeParameters();
    checkArgument(typeParameters.size() == 2, "map must have exactly 2 type parameter");
    Type keyType = typeParameters.get(0);
    Type valueType = typeParameters.get(1);
    ObjectInspector keyInspector = inspector.getMapKeyObjectInspector();
    ObjectInspector valueInspector = inspector.getMapValueObjectInspector();
    BlockBuilder currentBuilder;
    if (builder != null) {
      currentBuilder = builder.beginBlockEntry();
    } else {
      currentBuilder =
          new InterleavedBlockBuilder(typeParameters, new BlockBuilderStatus(), map.size());
    }

    for (Map.Entry<?, ?> entry : map.entrySet()) {
      // Hive skips map entries with null keys
      if (entry.getKey() != null) {
        serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector);
        serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector);
      }
    }

    if (builder != null) {
      builder.closeEntry();
      return null;
    } else {
      Block resultBlock = currentBuilder.build();
      return resultBlock;
    }
  }
Exemple #7
0
  private static Block serializeStruct(
      Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) {
    if (object == null) {
      requireNonNull(builder, "parent builder is null").appendNull();
      return null;
    }

    List<Type> typeParameters = type.getTypeParameters();
    List<? extends StructField> allStructFieldRefs = inspector.getAllStructFieldRefs();
    checkArgument(typeParameters.size() == allStructFieldRefs.size());
    BlockBuilder currentBuilder;
    if (builder != null) {
      currentBuilder = builder.beginBlockEntry();
    } else {
      currentBuilder =
          new InterleavedBlockBuilder(
              typeParameters, new BlockBuilderStatus(), typeParameters.size());
    }

    for (int i = 0; i < typeParameters.size(); i++) {
      StructField field = allStructFieldRefs.get(i);
      serializeObject(
          typeParameters.get(i),
          currentBuilder,
          inspector.getStructFieldData(object, field),
          field.getFieldObjectInspector());
    }

    if (builder != null) {
      builder.closeEntry();
      return null;
    } else {
      Block resultBlock = currentBuilder.build();
      return resultBlock;
    }
  }
  @Override
  public Block readBlock(Type type) throws IOException {
    if (!rowGroupOpen) {
      openRowGroup();
    }

    if (readOffset > 0) {
      if (presentStream != null) {
        // skip ahead the present bit reader, but count the set bits
        // and use this as the skip size for the field readers
        readOffset = presentStream.countBitsSet(readOffset);
      }
      for (StreamReader structField : structFields) {
        structField.prepareNextRead(readOffset);
      }
    }

    List<Type> typeParameters = type.getTypeParameters();

    boolean[] nullVector = new boolean[nextBatchSize];
    Block[] blocks = new Block[typeParameters.size()];
    if (presentStream == null) {
      for (int i = 0; i < typeParameters.size(); i++) {
        StreamReader structField = structFields[i];
        structField.prepareNextRead(nextBatchSize);
        blocks[i] = structField.readBlock(typeParameters.get(i));
      }
    } else {
      int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
      if (nullValues != nextBatchSize) {
        for (int i = 0; i < typeParameters.size(); i++) {
          StreamReader structField = structFields[i];
          structField.prepareNextRead(nextBatchSize - nullValues);
          blocks[i] = structField.readBlock(typeParameters.get(i));
        }
      } else {
        for (int i = 0; i < typeParameters.size(); i++) {
          blocks[i] = typeParameters.get(i).createBlockBuilder(new BlockBuilderStatus(), 0).build();
        }
      }
    }

    // Build offsets for array block (null valued have no positions)
    int[] offsets = new int[nextBatchSize];
    offsets[0] = (nullVector[0] ? 0 : typeParameters.size());
    for (int i = 1; i < nextBatchSize; i++) {
      offsets[i] = offsets[i - 1] + (nullVector[i] ? 0 : typeParameters.size());
    }

    // Struct is represented as an array block holding an interleaved block
    InterleavedBlock interleavedBlock = new InterleavedBlock(blocks);
    ArrayBlock arrayBlock =
        new ArrayBlock(
            interleavedBlock,
            Slices.wrappedIntArray(offsets),
            0,
            Slices.wrappedBooleanArray(nullVector));

    readOffset = 0;
    nextBatchSize = 0;

    return arrayBlock;
  }