@Override
  public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
    checkArgument(!isNullOrEmpty(tableMetadata.getOwner()), "Table owner is null or empty");

    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();

    ImmutableList.Builder<String> columnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();

    buildColumnInfo(tableMetadata, columnNames, columnTypes);

    ImmutableList.Builder<FieldSchema> partitionKeys = ImmutableList.builder();
    ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder();

    List<String> names = columnNames.build();
    List<String> typeNames =
        columnTypes
            .build()
            .stream()
            .map(HiveType::toHiveType)
            .map(HiveType::getHiveTypeName)
            .collect(toList());

    for (int i = 0; i < names.size(); i++) {
      if (tableMetadata.getColumns().get(i).isPartitionKey()) {
        partitionKeys.add(new FieldSchema(names.get(i), typeNames.get(i), null));
      } else {
        columns.add(new FieldSchema(names.get(i), typeNames.get(i), null));
      }
    }

    Path targetPath = getTargetPath(schemaName, tableName, schemaTableName);

    HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(session, this.hiveStorageFormat);
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(targetPath.toString());

    sd.setCols(columns.build());
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(hiveStorageFormat.getInputFormat());
    sd.setOutputFormat(hiveStorageFormat.getOutputFormat());

    Table table = new Table();
    table.setDbName(schemaName);
    table.setTableName(tableName);
    table.setOwner(tableMetadata.getOwner());
    table.setTableType(TableType.MANAGED_TABLE.toString());
    String tableComment = "Created by Presto";
    table.setParameters(ImmutableMap.of("comment", tableComment));
    table.setPartitionKeys(partitionKeys.build());
    table.setSd(sd);

    metastore.createTable(table);
  }
Example #2
0
  private static void fromMetastoreApiStorageDescriptor(
      StorageDescriptor storageDescriptor, Storage.Builder builder, String tablePartitionName) {
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder
        .setStorageFormat(
            StorageFormat.createNullable(
                serdeInfo.getSerializationLib(),
                storageDescriptor.getInputFormat(),
                storageDescriptor.getOutputFormat()))
        .setLocation(nullToEmpty(storageDescriptor.getLocation()))
        .setBucketProperty(
            HiveBucketProperty.fromStorageDescriptor(storageDescriptor, tablePartitionName))
        .setSorted(storageDescriptor.isSetSortCols() && !storageDescriptor.getSortCols().isEmpty())
        .setSkewed(
            storageDescriptor.isSetSkewedInfo()
                && storageDescriptor.getSkewedInfo().isSetSkewedColNames()
                && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
        .setSerdeParameters(
            serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
  }
Example #3
0
  private static StorageDescriptor makeStorageDescriptor(
      String tableName, List<Column> columns, Storage storage) {
    if (storage.isSorted() || storage.isSkewed()) {
      throw new IllegalArgumentException(
          "Writing to sorted and/or skewed table/partition is not supported");
    }
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
      sd.setNumBuckets(bucketProperty.get().getBucketCount());
      sd.setBucketCols(bucketProperty.get().getBucketedBy());
    }

    return sd;
  }
  /**
   * Tests that Impala is able to create an HdfsStorageDescriptor using all combinations of Parquet
   * SerDe class name + input/output format class name.
   */
  @Test
  public void testParquetFileFormat()
      throws DatabaseNotFoundException, InvalidStorageDescriptorException {
    String[] parquetSerDe =
        new String[] {
          "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
          "parquet.hive.serde.ParquetHiveSerDe"
        };
    String[] inputFormats =
        new String[] {
          "com.cloudera.impala.hive.serde.ParquetInputFormat",
          "parquet.hive.DeprecatedParquetInputFormat",
          "parquet.hive.MapredParquetInputFormat",
          "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
        };
    String[] outputFormats =
        new String[] {
          "com.cloudera.impala.hive.serde.ParquetOutputFormat",
          "parquet.hive.DeprecatedParquetOutputFormat",
          "parquet.hive.MapredParquetOutputFormat",
          "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
        };

    for (String serDe : parquetSerDe) {
      SerDeInfo serDeInfo = new SerDeInfo();
      serDeInfo.setSerializationLib(serDe);
      serDeInfo.setParameters(new HashMap<String, String>());
      for (String inputFormat : inputFormats) {
        for (String outputFormat : outputFormats) {
          StorageDescriptor sd = new StorageDescriptor();
          sd.setSerdeInfo(serDeInfo);
          sd.setInputFormat(inputFormat);
          sd.setOutputFormat(outputFormat);
          assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTblName", sd));
        }
      }
    }
  }
  @Override
  public void commitCreateTable(
      ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) {
    HiveOutputTableHandle handle =
        checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle");

    // verify no one raced us to create the target directory
    Path targetPath = new Path(handle.getTargetPath());

    // rename if using a temporary directory
    if (handle.hasTemporaryPath()) {
      if (pathExists(targetPath)) {
        SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName());
        throw new PrestoException(
            HIVE_PATH_ALREADY_EXISTS,
            format(
                "Unable to commit creation of table '%s': target directory already exists: %s",
                table, targetPath));
      }
      // rename the temporary directory to the target
      rename(new Path(handle.getTemporaryPath()), targetPath);
    }

    // create the table in the metastore
    List<String> types =
        handle
            .getColumnTypes()
            .stream()
            .map(HiveType::toHiveType)
            .map(HiveType::getHiveTypeName)
            .collect(toList());

    boolean sampled = false;
    ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder();
    for (int i = 0; i < handle.getColumnNames().size(); i++) {
      String name = handle.getColumnNames().get(i);
      String type = types.get(i);
      if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) {
        columns.add(new FieldSchema(name, type, "Presto sample weight column"));
        sampled = true;
      } else {
        columns.add(new FieldSchema(name, type, null));
      }
    }

    HiveStorageFormat hiveStorageFormat = handle.getHiveStorageFormat();

    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(handle.getTableName());
    serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe());
    serdeInfo.setParameters(ImmutableMap.<String, String>of());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(targetPath.toString());
    sd.setCols(columns.build());
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(hiveStorageFormat.getInputFormat());
    sd.setOutputFormat(hiveStorageFormat.getOutputFormat());
    sd.setParameters(ImmutableMap.<String, String>of());

    Table table = new Table();
    table.setDbName(handle.getSchemaName());
    table.setTableName(handle.getTableName());
    table.setOwner(handle.getTableOwner());
    table.setTableType(TableType.MANAGED_TABLE.toString());
    String tableComment = "Created by Presto";
    if (sampled) {
      tableComment =
          "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling.";
    }
    table.setParameters(ImmutableMap.of("comment", tableComment));
    table.setPartitionKeys(ImmutableList.<FieldSchema>of());
    table.setSd(sd);

    metastore.createTable(table);
  }