private static StorageDescriptor makeStorageDescriptor( String tableName, List<Column> columns, Storage storage) { if (storage.isSorted() || storage.isSkewed()) { throw new IllegalArgumentException( "Writing to sorted and/or skewed table/partition is not supported"); } SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable()); serdeInfo.setParameters(storage.getSerdeParameters()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(emptyToNull(storage.getLocation())); sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList())); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable()); sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable()); sd.setParameters(ImmutableMap.of()); Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty(); if (bucketProperty.isPresent()) { sd.setNumBuckets(bucketProperty.get().getBucketCount()); sd.setBucketCols(bucketProperty.get().getBucketedBy()); } return sd; }
private static Properties getHiveSchema( Storage sd, List<Column> dataColumns, List<Column> tableDataColumns, Map<String, String> parameters, String databaseName, String tableName, List<Column> partitionKeys) { // Mimics function in Hive: // MetaStoreUtils.getSchema(StorageDescriptor, StorageDescriptor, Map<String, String>, String, // String, List<FieldSchema>) Properties schema = new Properties(); schema.setProperty(FILE_INPUT_FORMAT, sd.getStorageFormat().getInputFormat()); schema.setProperty(FILE_OUTPUT_FORMAT, sd.getStorageFormat().getOutputFormat()); schema.setProperty(META_TABLE_NAME, databaseName + "." + tableName); schema.setProperty(META_TABLE_LOCATION, sd.getLocation()); if (sd.getBucketProperty().isPresent()) { schema.setProperty(BUCKET_FIELD_NAME, sd.getBucketProperty().get().getBucketedBy().get(0)); schema.setProperty( BUCKET_COUNT, Integer.toString(sd.getBucketProperty().get().getBucketCount())); } else { schema.setProperty(BUCKET_COUNT, "0"); } for (Map.Entry<String, String> param : sd.getSerdeParameters().entrySet()) { schema.setProperty(param.getKey(), (param.getValue() != null) ? param.getValue() : ""); } schema.setProperty(SERIALIZATION_LIB, sd.getStorageFormat().getSerDe()); StringBuilder columnNameBuilder = new StringBuilder(); StringBuilder columnTypeBuilder = new StringBuilder(); StringBuilder columnCommentBuilder = new StringBuilder(); boolean first = true; for (Column column : tableDataColumns) { if (!first) { columnNameBuilder.append(","); columnTypeBuilder.append(":"); columnCommentBuilder.append('\0'); } columnNameBuilder.append(column.getName()); columnTypeBuilder.append(column.getType()); columnCommentBuilder.append(column.getComment().orElse("")); first = false; } String columnNames = columnNameBuilder.toString(); String columnTypes = columnTypeBuilder.toString(); schema.setProperty(META_TABLE_COLUMNS, columnNames); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes); schema.setProperty("columns.comments", columnCommentBuilder.toString()); schema.setProperty(SERIALIZATION_DDL, toThriftDdl(tableName, dataColumns)); String partString = ""; String partStringSep = ""; String partTypesString = ""; String partTypesStringSep = ""; for (Column partKey : partitionKeys) { partString += partStringSep; partString += partKey.getName(); partTypesString += partTypesStringSep; partTypesString += partKey.getType().getHiveTypeName(); if (partStringSep.length() == 0) { partStringSep = "/"; partTypesStringSep = ":"; } } if (partString.length() > 0) { schema.setProperty(META_TABLE_PARTITION_COLUMNS, partString); schema.setProperty(META_TABLE_PARTITION_COLUMN_TYPES, partTypesString); } if (parameters != null) { for (Map.Entry<String, String> entry : parameters.entrySet()) { // add non-null parameters to the schema if (entry.getValue() != null) { schema.setProperty(entry.getKey(), entry.getValue()); } } } return schema; }