static Map<String, String> createPtnKeyValueMap(Table table, Partition ptn) throws IOException { List<String> values = ptn.getValues(); if (values.size() != table.getPartitionKeys().size()) { throw new IOException( "Partition values in partition inconsistent with table definition, table " + table.getTableName() + " has " + table.getPartitionKeys().size() + " partition keys, partition has " + values.size() + "partition values"); } Map<String, String> ptnKeyValues = new HashMap<String, String>(); int i = 0; for (FieldSchema schema : table.getPartitionKeys()) { // CONCERN : the way this mapping goes, the order *needs* to be // preserved for table.getPartitionKeys() and ptn.getValues() ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); i++; } return ptnKeyValues; }
private Referenceable replaceTableQFName( HiveEventContext event, Table oldTable, Table newTable, final Referenceable tableEntity, final String oldTableQFName, final String newTableQFName) throws HiveException { tableEntity.set(AtlasClient.NAME, oldTable.getTableName().toLowerCase()); tableEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldTableQFName); // Replace table entity with new name final Referenceable newEntity = new Referenceable(HiveDataTypes.HIVE_TABLE.getName()); newEntity.set(AtlasClient.NAME, newTable.getTableName().toLowerCase()); newEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newTableQFName); ArrayList<String> alias_list = new ArrayList<>(); alias_list.add(oldTable.getTableName().toLowerCase()); newEntity.set(HiveDataModelGenerator.TABLE_ALIAS_LIST, alias_list); event.addMessage( new HookNotification.EntityPartialUpdateRequest( event.getUser(), HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldTableQFName, newEntity)); return newEntity; }
/** * @param db * @param table * @param filter * @param path * @param jobConf * @return */ public static boolean setDataStorageLocation( String db, String table, String filter, String path, JobConf jobConf) { Preconditions.checkNotNull(table, "Table name must not be null"); HiveMetaStoreClient client = null; List<String> locations = new ArrayList<String>(); try { client = getHiveMetaStoreClient(jobConf); Table hiveTable = HCatUtil.getTable(client, db, table); hiveTable.setDataLocation(new URI(path)); client.alter_table(db, table, hiveTable.getTTable()); } catch (IOException e) { logError("Error occured when getting hiveconf", e); } catch (URISyntaxException e) { logError("Error occured when convert path to URI", e); } catch (MetaException e) { logError("Error occured when getting HiveMetaStoreClient", e); } catch (NoSuchObjectException e) { logError("Table doesn't exist in HCatalog: " + table, e); } catch (TException e) { logError("Error occured when getting Table", e); } finally { HCatUtil.closeHiveClientQuietly(client); } return true; }
/** * Method to fetch table data * * @param table table name * @param database database * @return list of columns in comma seperated way * @throws Exception if any error occurs */ private List<String> getTableData(String table, String database) throws Exception { HiveConf conf = new HiveConf(); conf.addResource("hive-site.xml"); ArrayList<String> results = new ArrayList<String>(); ArrayList<String> temp = new ArrayList<String>(); Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (!tbl.getPartCols().isEmpty()) { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<String> partLocs = new ArrayList<String>(); for (Partition part : partitions) { partLocs.add(part.getLocation()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); task.initialize(conf, null, null); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); } return results; }
@Override public LongWatermark getPreviousHighWatermark(Table table) { if (this.tableWatermarks.containsKey(table.getCompleteName())) { return this.tableWatermarks.get(table.getCompleteName()); } return new LongWatermark(0); }
/** * Retrieve skewed column name for a table. * * @param alias table alias * @return */ public List<String> getSkewedColumnNames(String alias) { List<String> skewedColNames = null; if (null != qbm && null != qbm.getAliasToTable() && qbm.getAliasToTable().size() > 0) { Table tbl = getMetaData().getTableForAlias(alias); skewedColNames = tbl.getSkewedColNames(); } return skewedColNames; }
public static void createHiveTable(String db, String table, List<FieldSchema> columns) throws Exception { Table tbl1 = new Table(db, table); tbl1.setFields(columns); Hive.get().createTable(tbl1); Assert.assertNotNull(Hive.get().getTable(db, table)); log.info("Created table : " + table); }
/** Clear out any side effects of running tests */ public void clearTablesCreatedDuringTests() throws Exception { if (System.getenv(QTEST_LEAVE_FILES) != null) { return; } // Delete any tables other than the source tables // and any databases other than the default database. for (String dbName : db.getAllDatabases()) { SessionState.get().setCurrentDatabase(dbName); for (String tblName : db.getAllTables()) { if (!DEFAULT_DATABASE_NAME.equals(dbName)) { Table tblObj = db.getTable(tblName); // dropping index table can not be dropped directly. Dropping the base // table will automatically drop all its index table if (tblObj.isIndexTable()) { continue; } db.dropTable(dbName, tblName); } else { // this table is defined in srcTables, drop all indexes on it List<Index> indexes = db.getIndexes(dbName, tblName, (short) -1); if (indexes != null && indexes.size() > 0) { for (Index index : indexes) { db.dropIndex(dbName, tblName, index.getIndexName(), true, true); } } } } if (!DEFAULT_DATABASE_NAME.equals(dbName)) { // Drop cascade, may need to drop functions db.dropDatabase(dbName, true, true, true); } } // delete remaining directories for external tables (can affect stats for following tests) try { Path p = new Path(testWarehouse); FileSystem fileSystem = p.getFileSystem(conf); if (fileSystem.exists(p)) { for (FileStatus status : fileSystem.listStatus(p)) { if (status.isDir()) { fileSystem.delete(status.getPath(), true); } } } } catch (IllegalArgumentException e) { // ignore.. provides invalid url sometimes intentionally } SessionState.get().setCurrentDatabase(DEFAULT_DATABASE_NAME); List<String> roleNames = db.getAllRoleNames(); for (String roleName : roleNames) { if (!"PUBLIC".equalsIgnoreCase(roleName) && !"ADMIN".equalsIgnoreCase(roleName)) { db.dropRole(roleName); } } }
/** * return the partition columns from a table instance * * @param table the instance to extract partition columns from * @return HCatSchema instance which contains the partition columns * @throws IOException */ public static HCatSchema getPartitionColumns(Table table) throws IOException { HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>()); if (table.getPartitionKeys().size() != 0) { for (FieldSchema fs : table.getPartitionKeys()) { cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return cols; }
/** * @param db * @param table * @param filter * @param jobConf * @return A list of locations */ public static List<String> getDataStorageLocation( String db, String table, String filter, JobConf jobConf) { Preconditions.checkNotNull(table, "Table name must not be null"); HiveMetaStoreClient client = null; List<String> locations = new ArrayList<String>(); try { client = getHiveMetaStoreClient(jobConf); Table hiveTable = HCatUtil.getTable(client, db, table); if (hiveTable.isPartitioned()) { List<Partition> parts = null; if (null != StringUtils.stripToNull(filter)) { parts = client.listPartitionsByFilter(db, table, filter, (short) -1); } else { parts = client.listPartitions(db, table, (short) -1); } if (parts.size() > 0) { // Return more than one partitions when filter is // something // like ds >= 1234 for (Partition part : parts) { locations.addAll(getFilesInHivePartition(part, jobConf)); } } else { logError( "Table " + hiveTable.getTableName() + " doesn't have the specified partition:" + filter, null); } } else { locations.add(hiveTable.getTTable().getSd().getLocation()); } } catch (IOException e) { logError("Error occured when getting hiveconf", e); } catch (MetaException e) { logError("Error occured when getting HiveMetaStoreClient", e); } catch (NoSuchObjectException e) { logError("Table doesn't exist in HCatalog: " + table, e); } catch (TException e) { logError("Error occured when getting Table", e); } finally { HCatUtil.closeHiveClientQuietly(client); } return locations; }
public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); if (table.getPartitionKeys().size() != 0) { // add partition keys to table schema // NOTE : this assumes that we do not ever have ptn keys as columns // inside the table schema as well! for (FieldSchema fs : table.getPartitionKeys()) { tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return tableSchema; }
/** * Determines if one can insert into partition(s), or there's a conflict with archive. It can be * because partition is itself archived or it is to be created inside existing archive. The second * case is when partition doesn't exist yet, but it would be inside of an archive if it existed. * This one is quite tricky to check, we need to find at least one partition inside of the parent * directory. If it is archived and archiving level tells that the archival was done of directory * partition is in it means we cannot insert; otherwise we can. This method works both for full * specifications and partial ones - in second case it checks if any partition that could possibly * match such specification is inside archive. * * @param db - Hive object * @param tbl - table where partition is * @param partSpec - partition specification with possible nulls in case of dynamic partiton * inserts * @return null if partition can be inserted, string with colliding archive name when it can't * @throws HiveException */ public static String conflictingArchiveNameOrNull( Hive db, Table tbl, LinkedHashMap<String, String> partSpec) throws HiveException { List<FieldSchema> partKeys = tbl.getPartitionKeys(); int partSpecLevel = 0; for (FieldSchema partKey : partKeys) { if (!partSpec.containsKey(partKey.getName())) { break; } partSpecLevel++; } if (partSpecLevel != partSpec.size()) { throw new HiveException("partspec " + partSpec + " is wrong for table " + tbl.getTableName()); } Map<String, String> spec = new HashMap<String, String>(partSpec); List<String> reversedKeys = new LinkedList<String>(); for (FieldSchema fs : tbl.getPartCols()) { if (spec.containsKey(fs.getName())) { reversedKeys.add(0, fs.getName()); } } for (String rk : reversedKeys) { List<Partition> parts = db.getPartitions(tbl, spec, (short) 1); if (parts.size() != 0) { Partition p = parts.get(0); if (!isArchived(p)) { // if archiving was done at this or at upper level, every matched // partition would be archived, so it not being archived means // no archiving was done neither at this nor at upper level return null; } else if (getArchivingLevel(p) > spec.size()) { // if archiving was done at this or at upper level its level // would be lesser or equal to specification size // it is not, which means no archiving at this or upper level return null; } else { return getPartialName(p, getArchivingLevel(p)); } } spec.remove(rk); } return null; }
private void handleExternalTables( final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { List<Referenceable> entities = new ArrayList<>(); final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); Table hiveTable = hiveEntity.getTable(); // Refresh to get the correct location hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); final String location = lower(hiveTable.getDataLocation().toString()); if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setName(location); SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); } }; SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(hiveEntity, tables.get(Type.TABLE)); } }; Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); if (isCreateOp(event)) { processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); } entities.addAll(tables.values()); entities.add(processReferenceable); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); } }
/* * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED * <table> command. For the formatted describe output the goal is to be exactly the * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this, * Hive's MetadataFormatUtils class is used to build the results. */ private static TDescribeTableResult describeTableFormatted(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); descResult.results = Lists.newArrayList(); org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable().deepCopy(); // Fixup the metastore table so the output of DESCRIBE FORMATTED matches Hive's. // This is to distinguish between empty comments and no comments (value is null). for (FieldSchema fs : msTable.getSd().getCols()) fs.setComment(table.getColumn(fs.getName()).getComment()); for (FieldSchema fs : msTable.getPartitionKeys()) { fs.setComment(table.getColumn(fs.getName()).getComment()); } // To avoid initializing any of the SerDe classes in the metastore table Thrift // struct, create the ql.metadata.Table object by calling the empty c'tor and // then calling setTTable(). org.apache.hadoop.hive.ql.metadata.Table hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(); hiveTable.setTTable(msTable); StringBuilder sb = new StringBuilder(); // First add all the columns (includes partition columns). sb.append( MetaDataFormatUtils.getAllColumnsInformation( msTable.getSd().getCols(), msTable.getPartitionKeys())); // Add the extended table metadata information. sb.append(MetaDataFormatUtils.getTableInformation(hiveTable)); for (String line : sb.toString().split("\n")) { // To match Hive's HiveServer2 output, split each line into multiple column // values based on the field delimiter. String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM); TResultRow resultRow = new TResultRow(); for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) { TColumnValue colVal = new TColumnValue(); colVal.setString_val(null); if (columns.length > i) { // Add the column value. colVal.setString_val(columns[i]); } resultRow.addToColVals(colVal); } descResult.results.add(resultRow); } return descResult; }
/** * Validate alter table description. * * @throws SemanticException */ public void validate() throws SemanticException { if (null != table) { /* Validate skewed information. */ ValidationUtility.validateSkewedInformation( ParseUtils.validateColumnNameUniqueness(table.getCols()), this.getSkewedColNames(), this.getSkewedColValues()); } }
/** * This block of code iterates over the topToTable map from ParseContext to determine if the query * has a scan over multiple tables. * * @return */ boolean ifQueryHasMultipleTables() { Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable(); Iterator<Table> valuesItr = topToTable.values().iterator(); Set<String> tableNameSet = new HashSet<String>(); while (valuesItr.hasNext()) { Table table = valuesItr.next(); tableNameSet.add(table.getTableName()); } if (tableNameSet.size() > 1) { LOG.debug( "Query has more than one table " + "that is not supported with " + getName() + " optimization."); return true; } return false; }
/** * This code block iterates over indexes on the table and populates the indexToKeys map for all * the indexes that satisfy the rewrite criteria. * * @param indexTables * @return * @throws SemanticException */ Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException { Index index = null; Hive hiveInstance = hiveDb; Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>(); for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) { final Set<String> indexKeyNames = new LinkedHashSet<String>(); index = indexTables.get(idxCtr); // Getting index key columns StorageDescriptor sd = index.getSd(); List<FieldSchema> idxColList = sd.getCols(); for (FieldSchema fieldSchema : idxColList) { indexKeyNames.add(fieldSchema.getName()); } assert indexKeyNames.size() == 1; // Check that the index schema is as expected. This code block should // catch problems of this rewrite breaking when the AggregateIndexHandler // index is changed. List<String> idxTblColNames = new ArrayList<String>(); try { Table idxTbl = hiveInstance.getTable(index.getDbName(), index.getIndexTableName()); for (FieldSchema idxTblCol : idxTbl.getCols()) { idxTblColNames.add(idxTblCol.getName()); } } catch (HiveException e) { LOG.error( "Got exception while locating index table, " + "skipping " + getName() + " optimization"); LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } assert (idxTblColNames.contains(IDX_BUCKET_COL)); assert (idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL)); // we add all index tables which can be used for rewrite // and defer the decision of using a particular index for later // this is to allow choosing a index if a better mechanism is // designed later to chose a better rewrite indexToKeysMap.put(index, indexKeyNames); } return indexToKeysMap; }
// Return true if the table is bucketed/sorted by the specified positions // The number of buckets, the sort order should also match along with the // columns which are bucketed/sorted private boolean checkTable( Table table, List<Integer> bucketPositionsDest, List<Integer> sortPositionsDest, List<Integer> sortOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = table.getNumBuckets(); if (numBucketsDest != numBuckets) { return false; } List<Integer> tableBucketPositions = getBucketPositions(table.getBucketCols(), table.getCols()); ObjectPair<List<Integer>, List<Integer>> tableSortPositionsOrder = getSortPositionsOrder(table.getSortCols(), table.getCols()); return bucketPositionsDest.equals(tableBucketPositions) && sortPositionsDest.equals(tableSortPositionsOrder.getFirst()) && sortOrderDest.equals(tableSortPositionsOrder.getSecond()); }
/** * Creates path where partitions matching prefix should lie in filesystem * * @param tbl table in which partition is * @return expected location of partitions matching prefix in filesystem */ public Path createPath(Table tbl) throws HiveException { String prefixSubdir; try { prefixSubdir = Warehouse.makePartName(fields, values); } catch (MetaException e) { throw new HiveException("Unable to get partitions directories prefix", e); } Path tableDir = tbl.getDataLocation(); if (tableDir == null) { throw new HiveException("Table has no location set"); } return new Path(tableDir, prefixSubdir); }
@BeforeTest @Override public void beforeTest() throws Exception { super.beforeTest(); List<FieldSchema> factColumns = new ArrayList<>(); factColumns.add(new FieldSchema("continent", "int", "")); factColumns.add(new FieldSchema("country", "int", "")); factColumns.add(new FieldSchema("region", "int", "")); factColumns.add(new FieldSchema("city", "int", "")); factColumns.add(new FieldSchema("count", "double", "")); factColumns.add(new FieldSchema("added", "double", "")); try { HiveConf hiveConf = new HiveConf(); SessionState.start(hiveConf); Hive.get().dropTable("default.wikipedia"); createHiveTable("default", "wikipedia", factColumns); Table tbl = CubeMetastoreClient.getInstance(hiveConf).getHiveTable("wikipedia"); tbl.setProperty("druid.table.time.dimension", "time"); } catch (HiveException e) { log.error("Exception while creating hive table", e); } }
private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { // crappy, no easy of getting new name assert event.getInputs() != null && event.getInputs().size() == 1; assert event.getOutputs() != null && event.getOutputs().size() > 0; // Update entity if not exists ReadEntity oldEntity = event.getInputs().iterator().next(); Table oldTable = oldEntity.getTable(); for (WriteEntity writeEntity : event.getOutputs()) { if (writeEntity.getType() == Entity.Type.TABLE) { Table newTable = writeEntity.getTable(); // Hive sends with both old and new table names in the outputs which is weird. So skipping // that with the below check if (!newTable.getDbName().equals(oldTable.getDbName()) || !newTable.getTableName().equals(oldTable.getTableName())) { final String oldQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable); final String newQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable); // Create/update old table entity - create entity with oldQFNme and old tableName if it // doesnt exist. If exists, will update // We always use the new entity while creating the table since some flags, attributes of // the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since // the table doesnt exist in hive anymore final LinkedHashMap<Type, Referenceable> tables = createOrUpdateEntities(dgiBridge, event, writeEntity, true); Referenceable tableEntity = tables.get(Type.TABLE); // Reset regular column QF Name to old Name and create a new partial notification request // to replace old column QFName to newName to retain any existing traits replaceColumnQFName( event, (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.COLUMNS), oldQualifiedName, newQualifiedName); // Reset partition key column QF Name to old Name and create a new partial notification // request to replace old column QFName to newName to retain any existing traits replaceColumnQFName( event, (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.PART_COLS), oldQualifiedName, newQualifiedName); // Reset SD QF Name to old Name and create a new partial notification request to replace // old SD QFName to newName to retain any existing traits replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName); // Reset Table QF Name to old Name and create a new partial notification request to // replace old Table QFName to newName replaceTableQFName( event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName); } } } }
/** * Extract partial prefix specification from table and key-value map * * @param tbl table in which partition is * @param partSpec specification of partition * @return extracted specification */ public static PartSpecInfo create(Table tbl, Map<String, String> partSpec) throws HiveException { // we have to check if we receive prefix of partition keys so in table // scheme like table/ds=2011-01-02/hr=13/ // ARCHIVE PARTITION (ds='2011-01-02') will work and // ARCHIVE PARTITION(hr='13') won't List<FieldSchema> prefixFields = new ArrayList<FieldSchema>(); List<String> prefixValues = new ArrayList<String>(); List<FieldSchema> partCols = tbl.getPartCols(); Iterator<String> itrPsKeys = partSpec.keySet().iterator(); for (FieldSchema fs : partCols) { if (!itrPsKeys.hasNext()) { break; } if (!itrPsKeys.next().toLowerCase().equals(fs.getName().toLowerCase())) { throw new HiveException("Invalid partition specifiation: " + partSpec); } prefixFields.add(fs); prefixValues.add(partSpec.get(fs.getName())); } return new PartSpecInfo(prefixFields, prefixValues); }
private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { assert event.getInputs() != null && event.getInputs().size() == 1; assert event.getOutputs() != null && event.getOutputs().size() > 0; Table oldTable = event.getInputs().iterator().next().getTable(); List<FieldSchema> oldColList = oldTable.getAllCols(); Table outputTbl = event.getOutputs().iterator().next().getTable(); outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName()); List<FieldSchema> newColList = outputTbl.getAllCols(); assert oldColList.size() == newColList.size(); Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList); String oldColName = changedColNamePair.getLeft(); String newColName = changedColNamePair.getRight(); for (WriteEntity writeEntity : event.getOutputs()) { if (writeEntity.getType() == Type.TABLE) { Table newTable = writeEntity.getTable(); createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable); final String newQualifiedTableName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable); String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName); String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName); Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName()); newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName); event.addMessage( new HookNotification.EntityPartialUpdateRequest( event.getUser(), HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName, newColEntity)); } } handleEventOutputs(dgiBridge, event, Type.TABLE); }
/** * Validate partition schema, checks if the column types match between the partition and the * existing table schema. Returns the list of columns present in the partition but not in the * table. * * @param table the table * @param partitionSchema the partition schema * @return the list of newly added fields * @throws IOException Signals that an I/O exception has occurred. */ public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema) throws IOException { Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>(); for (FieldSchema field : table.getPartitionKeys()) { partitionKeyMap.put(field.getName().toLowerCase(), field); } List<FieldSchema> tableCols = table.getCols(); List<FieldSchema> newFields = new ArrayList<FieldSchema>(); for (int i = 0; i < partitionSchema.getFields().size(); i++) { FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i)); FieldSchema tableField; if (i < tableCols.size()) { tableField = tableCols.get(i); if (!tableField.getName().equalsIgnoreCase(field.getName())) { throw new HCatException( ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, "Expected column <" + tableField.getName() + "> at position " + (i + 1) + ", found column <" + field.getName() + ">"); } } else { tableField = partitionKeyMap.get(field.getName().toLowerCase()); if (tableField != null) { throw new HCatException( ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">"); } } if (tableField == null) { // field present in partition but not in table newFields.add(field); } else { // field present in both. validate type has not changed TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType()); TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType()); if (!partitionType.equals(tableType)) { throw new HCatException( ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + field.getName() + ">, expected <" + tableType.getTypeName() + ">, got <" + partitionType.getTypeName() + ">"); } } } return newFields; }
/** called from {@link SemanticAnalyzer#genTablePlan} */ public static void genTableName(RowResolver rwsch, Table tab) { rwsch.tableOriginalName = tab.getTableName(); System.out.println("======Gen Table Name===== " + rwsch.tableOriginalName); }
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws Exception { Database db = null; Table table = null; Partition partition = null; LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>(); List<Referenceable> entities = new ArrayList<>(); switch (entity.getType()) { case DATABASE: db = entity.getDatabase(); break; case TABLE: table = entity.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; case PARTITION: partition = entity.getPartition(); table = partition.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; } db = dgiBridge.hiveClient.getDatabase(db.getName()); Referenceable dbEntity = dgiBridge.createDBInstance(db); entities.add(dbEntity); result.put(Type.DATABASE, dbEntity); Referenceable tableEntity = null; if (table != null) { if (existTable != null) { table = existTable; } else { table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); } // If its an external table, even though the temp table skip flag is on, // we create the table since we need the HDFS path to temp table lineage. if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) { LOG.debug( "Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name()); } else { tableEntity = dgiBridge.createTableInstance(dbEntity, table); entities.add(tableEntity); result.put(Type.TABLE, tableEntity); } } event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); return result; }
/** * We traverse the current operator tree to check for conditions in which the optimization cannot * be applied. * * <p>At the end, we check if all conditions have passed for rewrite. If yes, we determine if the * the index is usable for rewrite. Else, we log the condition which did not meet the rewrite * criterion. * * @return * @throws SemanticException */ boolean shouldApplyOptimization() throws SemanticException { boolean canApply = false; if (ifQueryHasMultipleTables()) { // We do not apply this optimization for this case as of now. return false; } else { /* * This code iterates over each TableScanOperator from the topOps map from ParseContext. * For each operator tree originating from this top TableScanOperator, we determine * if the optimization can be applied. If yes, we add the name of the top table to * the tsOpToProcess to apply rewrite later on. * */ Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable(); Iterator<TableScanOperator> topOpItr = topToTable.keySet().iterator(); while (topOpItr.hasNext()) { TableScanOperator topOp = topOpItr.next(); Table table = topToTable.get(topOp); baseTableName = table.getTableName(); Map<Table, List<Index>> indexes = getIndexesForRewrite(); if (indexes == null) { LOG.debug( "Error getting valid indexes for rewrite, " + "skipping " + getName() + " optimization"); return false; } if (indexes.size() == 0) { LOG.debug( "No Valid Index Found to apply Rewrite, " + "skipping " + getName() + " optimization"); return false; } else { // we need to check if the base table has confirmed or unknown partitions if (parseContext.getOpToPartList() != null && parseContext.getOpToPartList().size() > 0) { // if base table has partitions, we need to check if index is built for // all partitions. If not, then we do not apply the optimization if (checkIfIndexBuiltOnAllTablePartitions(topOp, indexes)) { // check if rewrite can be applied for operator tree // if partitions condition returns true canApply = checkIfRewriteCanBeApplied(topOp, table, indexes); } else { LOG.debug( "Index is not built for all table partitions, " + "skipping " + getName() + " optimization"); return false; } } else { // check if rewrite can be applied for operator tree // if there are no partitions on base table canApply = checkIfRewriteCanBeApplied(topOp, table, indexes); } } } } return canApply; }
/** * Returns the skewed values in all the tables which are going to be scanned. If the join is on * columns c1, c2 and c3 on tables T1 and T2, T1 is skewed on c1 and c4 with the skew values * ((1,2),(3,4)), whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))> * * @param op The join operator being optimized * @param tableScanOpsForJoin table scan operators which are parents of the join operator * @return map<join keys intersection skewedkeys, list of skewed values>. */ private Map<List<ExprNodeDesc>, List<List<String>>> getSkewedValues( Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) { Map<List<ExprNodeDesc>, List<List<String>>> skewDataReturn = new HashMap<List<ExprNodeDesc>, List<List<String>>>(); Map<List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData = new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>(); // The join keys are available in the reduceSinkOperators before join for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) { ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf(); if (rsDesc.getKeyCols() != null) { Table table = null; // Find the skew information corresponding to the table List<String> skewedColumns = null; List<List<String>> skewedValueList = null; // The join columns which are also skewed List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols = new ArrayList<ExprNodeDescEqualityWrapper>(); // skewed Keys which intersect with join keys List<Integer> positionSkewedKeys = new ArrayList<Integer>(); // Update the joinKeys appropriately. for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) { ExprNodeColumnDesc keyCol = null; // If the key column is not a column, then dont apply this optimization. // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445 // for type conversion UDFs. if (keyColDesc instanceof ExprNodeColumnDesc) { keyCol = (ExprNodeColumnDesc) keyColDesc; if (table == null) { table = getTable(parseContext, reduceSinkOp, tableScanOpsForJoin); skewedColumns = table == null ? null : table.getSkewedColNames(); // No skew on the table to take care of if ((skewedColumns == null) || (skewedColumns.isEmpty())) { continue; } skewedValueList = table == null ? null : table.getSkewedColValues(); } int pos = skewedColumns.indexOf(keyCol.getColumn()); if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) { positionSkewedKeys.add(pos); ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyCol.clone(); keyColClone.setTabAlias(null); joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone)); } } } // If the skew keys match the join keys, then add it to the list if ((skewedColumns != null) && (!skewedColumns.isEmpty())) { if (!joinKeysSkewedCols.isEmpty()) { // If the join keys matches the skewed keys, use the table skewed keys List<List<String>> skewedJoinValues; if (skewedColumns.size() == positionSkewedKeys.size()) { skewedJoinValues = skewedValueList; } else { skewedJoinValues = getSkewedJoinValues(skewedValueList, positionSkewedKeys); } List<List<String>> oldSkewedJoinValues = skewData.get(joinKeysSkewedCols); if (oldSkewedJoinValues == null) { oldSkewedJoinValues = new ArrayList<List<String>>(); } for (List<String> skewValue : skewedJoinValues) { if (!oldSkewedJoinValues.contains(skewValue)) { oldSkewedJoinValues.add(skewValue); } } skewData.put(joinKeysSkewedCols, oldSkewedJoinValues); } } } } // convert skewData to contain ExprNodeDesc in the keys for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry : skewData.entrySet()) { List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>(); for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) { skewedKeyJoinCols.add(key.getExprNodeDesc()); } skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue()); } return skewDataReturn; }
private List<HiveLockObj> getLockObjects( QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException { List<HiveLockObj> locks = new LinkedList<HiveLockObj>(); HiveLockObject.HiveLockObjectData lockData = new HiveLockObject.HiveLockObjectData( plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); if (db != null) { locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode)); return locks; } if (t != null) { locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode)); mode = HiveLockMode.SHARED; locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode)); return locks; } if (p != null) { if (!(p instanceof DummyPartition)) { locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode)); } // All the parents are locked in shared mode mode = HiveLockMode.SHARED; // For dummy partitions, only partition name is needed String name = p.getName(); if (p instanceof DummyPartition) { name = p.getName().split("@")[2]; } String partialName = ""; String[] partns = name.split("/"); int len = p instanceof DummyPartition ? partns.length : partns.length - 1; Map<String, String> partialSpec = new LinkedHashMap<String, String>(); for (int idx = 0; idx < len; idx++) { String partn = partns[idx]; partialName += partn; String[] nameValue = partn.split("="); assert (nameValue.length == 2); partialSpec.put(nameValue[0], nameValue[1]); try { locks.add( new HiveLockObj( new HiveLockObject( new DummyPartition( p.getTable(), p.getTable().getDbName() + "/" + MetaStoreUtils.encodeTableName(p.getTable().getTableName()) + "/" + partialName, partialSpec), lockData), mode)); partialName += "/"; } catch (HiveException e) { throw new LockException(e.getMessage()); } } locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode)); locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode)); } return locks; }
@Override protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) throws HiveException { // DB opereations, none of them are enforced by Hive right now. ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); if (showDatabases != null) { authorize( HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); } DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); if (dropDb != null) { Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); authorize(db, Privilege.DROP); } DescDatabaseDesc descDb = work.getDescDatabaseDesc(); if (descDb != null) { Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); authorize(db, Privilege.SELECT); } SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); if (switchDb != null) { Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); authorize(db, Privilege.SELECT); } ShowTablesDesc showTables = work.getShowTblsDesc(); if (showTables != null) { String dbName = showTables.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTables.getDbName(); authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); } ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); if (showTableStatus != null) { String dbName = showTableStatus.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTableStatus.getDbName(); authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); } // TODO: add alter database support in HCat // Table operations. DropTableDesc dropTable = work.getDropTblDesc(); if (dropTable != null) { if (dropTable.getPartSpecs() == null) { // drop table is already enforced by Hive. We only check for table level location even if // the // table is partitioned. } else { // this is actually a ALTER TABLE DROP PARITITION statement for (DropTableDesc.PartSpec partSpec : dropTable.getPartSpecs()) { // partitions are not added as write entries in drop partitions in Hive Table table = hive.getTable(SessionState.get().getCurrentDatabase(), dropTable.getTableName()); List<Partition> partitions = null; try { partitions = hive.getPartitionsByFilter(table, partSpec.getPartSpec().getExprString()); } catch (Exception e) { throw new HiveException(e); } for (Partition part : partitions) { authorize(part, Privilege.DROP); } } } } AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { Table table = hive.getTable(SessionState.get().getCurrentDatabase(), alterTable.getOldName(), false); Partition part = null; if (alterTable.getPartSpec() != null) { part = hive.getPartition(table, alterTable.getPartSpec(), false); } String newLocation = alterTable.getNewLocation(); /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements * for the old table/partition location and the new location. */ if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { if (part != null) { authorize(part, Privilege.ALTER_DATA); // authorize for the old // location, and new location part.setLocation(newLocation); authorize(part, Privilege.ALTER_DATA); } else { authorize(table, Privilege.ALTER_DATA); // authorize for the old // location, and new location table.getTTable().getSd().setLocation(newLocation); authorize(table, Privilege.ALTER_DATA); } } // other alter operations are already supported by Hive } // we should be careful when authorizing table based on just the // table name. If columns have separate authorization domain, it // must be honored DescTableDesc descTable = work.getDescTblDesc(); if (descTable != null) { String tableName = extractTableName(descTable.getTableName()); authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } ShowPartitionsDesc showParts = work.getShowPartsDesc(); if (showParts != null) { String tableName = extractTableName(showParts.getTabName()); authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } }