@Override public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) { checkArgument(!isNullOrEmpty(tableMetadata.getOwner()), "Table owner is null or empty"); SchemaTableName schemaTableName = tableMetadata.getTable(); String schemaName = schemaTableName.getSchemaName(); String tableName = schemaTableName.getTableName(); ImmutableList.Builder<String> columnNames = ImmutableList.builder(); ImmutableList.Builder<Type> columnTypes = ImmutableList.builder(); buildColumnInfo(tableMetadata, columnNames, columnTypes); ImmutableList.Builder<FieldSchema> partitionKeys = ImmutableList.builder(); ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder(); List<String> names = columnNames.build(); List<String> typeNames = columnTypes .build() .stream() .map(HiveType::toHiveType) .map(HiveType::getHiveTypeName) .collect(toList()); for (int i = 0; i < names.size(); i++) { if (tableMetadata.getColumns().get(i).isPartitionKey()) { partitionKeys.add(new FieldSchema(names.get(i), typeNames.get(i), null)); } else { columns.add(new FieldSchema(names.get(i), typeNames.get(i), null)); } } Path targetPath = getTargetPath(schemaName, tableName, schemaTableName); HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(session, this.hiveStorageFormat); SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(targetPath.toString()); sd.setCols(columns.build()); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(hiveStorageFormat.getInputFormat()); sd.setOutputFormat(hiveStorageFormat.getOutputFormat()); Table table = new Table(); table.setDbName(schemaName); table.setTableName(tableName); table.setOwner(tableMetadata.getOwner()); table.setTableType(TableType.MANAGED_TABLE.toString()); String tableComment = "Created by Presto"; table.setParameters(ImmutableMap.of("comment", tableComment)); table.setPartitionKeys(partitionKeys.build()); table.setSd(sd); metastore.createTable(table); }
private static void fromMetastoreApiStorageDescriptor( StorageDescriptor storageDescriptor, Storage.Builder builder, String tablePartitionName) { SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo(); if (serdeInfo == null) { throw new PrestoException( HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info"); } builder .setStorageFormat( StorageFormat.createNullable( serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat())) .setLocation(nullToEmpty(storageDescriptor.getLocation())) .setBucketProperty( HiveBucketProperty.fromStorageDescriptor(storageDescriptor, tablePartitionName)) .setSorted(storageDescriptor.isSetSortCols() && !storageDescriptor.getSortCols().isEmpty()) .setSkewed( storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty()) .setSerdeParameters( serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters()); }
private static StorageDescriptor makeStorageDescriptor( String tableName, List<Column> columns, Storage storage) { if (storage.isSorted() || storage.isSkewed()) { throw new IllegalArgumentException( "Writing to sorted and/or skewed table/partition is not supported"); } SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable()); serdeInfo.setParameters(storage.getSerdeParameters()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(emptyToNull(storage.getLocation())); sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList())); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable()); sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable()); sd.setParameters(ImmutableMap.of()); Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty(); if (bucketProperty.isPresent()) { sd.setNumBuckets(bucketProperty.get().getBucketCount()); sd.setBucketCols(bucketProperty.get().getBucketedBy()); } return sd; }
/** * Tests that Impala is able to create an HdfsStorageDescriptor using all combinations of Parquet * SerDe class name + input/output format class name. */ @Test public void testParquetFileFormat() throws DatabaseNotFoundException, InvalidStorageDescriptorException { String[] parquetSerDe = new String[] { "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", "parquet.hive.serde.ParquetHiveSerDe" }; String[] inputFormats = new String[] { "com.cloudera.impala.hive.serde.ParquetInputFormat", "parquet.hive.DeprecatedParquetInputFormat", "parquet.hive.MapredParquetInputFormat", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" }; String[] outputFormats = new String[] { "com.cloudera.impala.hive.serde.ParquetOutputFormat", "parquet.hive.DeprecatedParquetOutputFormat", "parquet.hive.MapredParquetOutputFormat", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" }; for (String serDe : parquetSerDe) { SerDeInfo serDeInfo = new SerDeInfo(); serDeInfo.setSerializationLib(serDe); serDeInfo.setParameters(new HashMap<String, String>()); for (String inputFormat : inputFormats) { for (String outputFormat : outputFormats) { StorageDescriptor sd = new StorageDescriptor(); sd.setSerdeInfo(serDeInfo); sd.setInputFormat(inputFormat); sd.setOutputFormat(outputFormat); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTblName", sd)); } } } }
@Override public void commitCreateTable( ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) { HiveOutputTableHandle handle = checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle"); // verify no one raced us to create the target directory Path targetPath = new Path(handle.getTargetPath()); // rename if using a temporary directory if (handle.hasTemporaryPath()) { if (pathExists(targetPath)) { SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName()); throw new PrestoException( HIVE_PATH_ALREADY_EXISTS, format( "Unable to commit creation of table '%s': target directory already exists: %s", table, targetPath)); } // rename the temporary directory to the target rename(new Path(handle.getTemporaryPath()), targetPath); } // create the table in the metastore List<String> types = handle .getColumnTypes() .stream() .map(HiveType::toHiveType) .map(HiveType::getHiveTypeName) .collect(toList()); boolean sampled = false; ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder(); for (int i = 0; i < handle.getColumnNames().size(); i++) { String name = handle.getColumnNames().get(i); String type = types.get(i); if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) { columns.add(new FieldSchema(name, type, "Presto sample weight column")); sampled = true; } else { columns.add(new FieldSchema(name, type, null)); } } HiveStorageFormat hiveStorageFormat = handle.getHiveStorageFormat(); SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(handle.getTableName()); serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe()); serdeInfo.setParameters(ImmutableMap.<String, String>of()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(targetPath.toString()); sd.setCols(columns.build()); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(hiveStorageFormat.getInputFormat()); sd.setOutputFormat(hiveStorageFormat.getOutputFormat()); sd.setParameters(ImmutableMap.<String, String>of()); Table table = new Table(); table.setDbName(handle.getSchemaName()); table.setTableName(handle.getTableName()); table.setOwner(handle.getTableOwner()); table.setTableType(TableType.MANAGED_TABLE.toString()); String tableComment = "Created by Presto"; if (sampled) { tableComment = "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling."; } table.setParameters(ImmutableMap.of("comment", tableComment)); table.setPartitionKeys(ImmutableList.<FieldSchema>of()); table.setSd(sd); metastore.createTable(table); }