private static StorageDescriptor makeStorageDescriptor( String tableName, List<Column> columns, Storage storage) { if (storage.isSorted() || storage.isSkewed()) { throw new IllegalArgumentException( "Writing to sorted and/or skewed table/partition is not supported"); } SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable()); serdeInfo.setParameters(storage.getSerdeParameters()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(emptyToNull(storage.getLocation())); sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList())); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable()); sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable()); sd.setParameters(ImmutableMap.of()); Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty(); if (bucketProperty.isPresent()) { sd.setNumBuckets(bucketProperty.get().getBucketCount()); sd.setBucketCols(bucketProperty.get().getBucketedBy()); } return sd; }
private static void createTable(String tableName, String tablePerm) throws Exception { Table tbl = new Table(); tbl.setDbName(DATABASE); tbl.setTableName(tableName); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(ColumnHolder.colMapping.get(tableName)); tbl.setSd(sd); sd.setParameters(new HashMap<String, String>()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo() .setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); tbl.setPartitionKeys(ColumnHolder.partitionCols); hmsc.createTable(tbl); FileSystem fs = FileSystem.get(mrConf); fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); }
@Override public void commitCreateTable( ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) { HiveOutputTableHandle handle = checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle"); // verify no one raced us to create the target directory Path targetPath = new Path(handle.getTargetPath()); // rename if using a temporary directory if (handle.hasTemporaryPath()) { if (pathExists(targetPath)) { SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName()); throw new PrestoException( HIVE_PATH_ALREADY_EXISTS, format( "Unable to commit creation of table '%s': target directory already exists: %s", table, targetPath)); } // rename the temporary directory to the target rename(new Path(handle.getTemporaryPath()), targetPath); } // create the table in the metastore List<String> types = handle .getColumnTypes() .stream() .map(HiveType::toHiveType) .map(HiveType::getHiveTypeName) .collect(toList()); boolean sampled = false; ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder(); for (int i = 0; i < handle.getColumnNames().size(); i++) { String name = handle.getColumnNames().get(i); String type = types.get(i); if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) { columns.add(new FieldSchema(name, type, "Presto sample weight column")); sampled = true; } else { columns.add(new FieldSchema(name, type, null)); } } HiveStorageFormat hiveStorageFormat = handle.getHiveStorageFormat(); SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(handle.getTableName()); serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe()); serdeInfo.setParameters(ImmutableMap.<String, String>of()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(targetPath.toString()); sd.setCols(columns.build()); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(hiveStorageFormat.getInputFormat()); sd.setOutputFormat(hiveStorageFormat.getOutputFormat()); sd.setParameters(ImmutableMap.<String, String>of()); Table table = new Table(); table.setDbName(handle.getSchemaName()); table.setTableName(handle.getTableName()); table.setOwner(handle.getTableOwner()); table.setTableType(TableType.MANAGED_TABLE.toString()); String tableComment = "Created by Presto"; if (sampled) { tableComment = "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling."; } table.setParameters(ImmutableMap.of("comment", tableComment)); table.setPartitionKeys(ImmutableList.<FieldSchema>of()); table.setSd(sd); metastore.createTable(table); }
public void setFieldValue(_Fields field, Object value) { switch (field) { case COLS: if (value == null) { unsetCols(); } else { setCols((List<FieldSchema>) value); } break; case LOCATION: if (value == null) { unsetLocation(); } else { setLocation((String) value); } break; case INPUT_FORMAT: if (value == null) { unsetInputFormat(); } else { setInputFormat((String) value); } break; case OUTPUT_FORMAT: if (value == null) { unsetOutputFormat(); } else { setOutputFormat((String) value); } break; case COMPRESSED: if (value == null) { unsetCompressed(); } else { setCompressed((Boolean) value); } break; case NUM_BUCKETS: if (value == null) { unsetNumBuckets(); } else { setNumBuckets((Integer) value); } break; case SERDE_INFO: if (value == null) { unsetSerdeInfo(); } else { setSerdeInfo((SerDeInfo) value); } break; case BUCKET_COLS: if (value == null) { unsetBucketCols(); } else { setBucketCols((List<String>) value); } break; case SORT_COLS: if (value == null) { unsetSortCols(); } else { setSortCols((List<Order>) value); } break; case PARAMETERS: if (value == null) { unsetParameters(); } else { setParameters((Map<String, String>) value); } break; } }
/** * Verifies Impala is able to properly parse delimiters in supported formats. See * HdfsStorageDescriptor.parseDelim() for details. */ @Test public void testDelimiters() throws InvalidStorageDescriptorException { StorageDescriptor sd = HiveStorageDescriptorFactory.createSd(THdfsFileFormat.TEXT, RowFormat.DEFAULT_ROW_FORMAT); sd.setParameters(new HashMap<String, String>()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-2"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-128"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "127"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "\001"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "|"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\t"); assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd)); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "ab"); try { HdfsStorageDescriptor.fromStorageDescriptor("fake", sd); fail(); } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) { assertEquals( "Invalid delimiter: 'ab'. Delimiter must be specified as a " + "single character or as a decimal value in the range [-128:127]", e.getMessage()); } sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "128"); try { HdfsStorageDescriptor.fromStorageDescriptor("fake", sd); fail(); } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) { assertEquals( "Invalid delimiter: '128'. Delimiter must be specified as a " + "single character or as a decimal value in the range [-128:127]", e.getMessage()); } sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\128"); try { HdfsStorageDescriptor.fromStorageDescriptor("fake", sd); fail(); } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) { assertEquals( "Invalid delimiter: '\128'. Delimiter must be specified as a " + "single character or as a decimal value in the range [-128:127]", e.getMessage()); } sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "-129"); try { HdfsStorageDescriptor.fromStorageDescriptor("fake", sd); fail(); } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) { assertEquals( "Invalid delimiter: '-129'. Delimiter must be specified as a " + "single character or as a decimal value in the range [-128:127]", e.getMessage()); } }
@Test public void testHiveLocalMetaStore() { // Create a table and display it back try { HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf()); hiveClient.dropTable( propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY), propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY), true, true); // Define the cols List<FieldSchema> cols = new ArrayList<FieldSchema>(); cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, "")); cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, "")); // Values for the StorageDescriptor String location = new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)) .getAbsolutePath(); String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"; int numBuckets = 16; Map<String, String> orcProps = new HashMap<String, String>(); orcProps.put("orc.compress", "NONE"); SerDeInfo serDeInfo = new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps); List<String> bucketCols = new ArrayList<String>(); bucketCols.add("id"); // Build the StorageDescriptor StorageDescriptor sd = new StorageDescriptor(); sd.setCols(cols); sd.setLocation(location); sd.setInputFormat(inputFormat); sd.setOutputFormat(outputFormat); sd.setNumBuckets(numBuckets); sd.setSerdeInfo(serDeInfo); sd.setBucketCols(bucketCols); sd.setSortCols(new ArrayList<Order>()); sd.setParameters(new HashMap<String, String>()); // Define the table Table tbl = new Table(); tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY)); tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)); tbl.setSd(sd); tbl.setOwner(System.getProperty("user.name")); tbl.setParameters(new HashMap<String, String>()); tbl.setViewOriginalText(""); tbl.setViewExpandedText(""); tbl.setTableType(TableType.EXTERNAL_TABLE.name()); List<FieldSchema> partitions = new ArrayList<FieldSchema>(); partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, "")); tbl.setPartitionKeys(partitions); // Create the table hiveClient.createTable(tbl); // Describe the table Table createdTable = hiveClient.getTable( propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY), propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)); LOG.info("HIVE: Created Table: {}", createdTable.toString()); assertThat( createdTable.toString(), containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))); } catch (MetaException e) { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); } }