private void testRoundTrip(HiveColumnHandle expected) { String json = codec.toJson(expected); HiveColumnHandle actual = codec.fromJson(json); assertEquals(actual.getClientId(), expected.getClientId()); assertEquals(actual.getName(), expected.getName()); assertEquals(actual.getHiveType(), expected.getHiveType()); assertEquals(actual.getHiveColumnIndex(), expected.getHiveColumnIndex()); assertEquals(actual.isPartitionKey(), expected.isPartitionKey()); }
public PrestoReadSupport( boolean useParquetColumnNames, List<HiveColumnHandle> columns, MessageType messageType) { this.columns = columns; this.useParquetColumnNames = useParquetColumnNames; ImmutableList.Builder<Converter> converters = ImmutableList.builder(); for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); if (!column.isPartitionKey()) { parquet.schema.Type parquetType = getParquetType(column, messageType); if (parquetType == null) { continue; } if (parquetType.isPrimitive()) { converters.add(new ParquetPrimitiveColumnConverter(i)); } else { GroupType groupType = parquetType.asGroupType(); switch (column.getTypeSignature().getBase()) { case ARRAY: ParquetColumnConverter listConverter = new ParquetColumnConverter( new ParquetListConverter(types[i], groupType.getName(), groupType), i); converters.add(listConverter); break; case StandardTypes.MAP: ParquetColumnConverter mapConverter = new ParquetColumnConverter( new ParquetMapConverter(types[i], groupType.getName(), groupType), i); converters.add(mapConverter); break; case ROW: ParquetColumnConverter rowConverter = new ParquetColumnConverter( new ParquetStructConverter(types[i], groupType.getName(), groupType), i); converters.add(rowConverter); break; default: throw new IllegalArgumentException( "Group column " + groupType.getName() + " type " + groupType.getOriginalType() + " not supported"); } } } } this.converters = converters.build(); }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public ParquetHiveRecordCursor( Configuration configuration, Path path, long start, long length, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager) { requireNonNull(path, "path is null"); checkArgument(length >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); this.totalBytes = length; int size = columns.size(); this.names = new String[size]; this.types = new Type[size]; this.isPartitionColumn = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; this.nullsRowDefault = new boolean[size]; for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); isPartitionColumn[i] = column.isPartitionKey(); nullsRowDefault[i] = !column.isPartitionKey(); } this.recordReader = createParquetRecordReader( configuration, path, start, length, columns, useParquetColumnNames); // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nullsRowDefault[columnIndex] = true; } else if (type.equals(BOOLEAN)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (type.equals(BIGINT)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (type.equals(DOUBLE)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (type.equals(VARCHAR)) { slices[columnIndex] = Slices.wrappedBuffer(bytes); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }