private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { // crappy, no easy of getting new name assert event.getInputs() != null && event.getInputs().size() == 1; assert event.getOutputs() != null && event.getOutputs().size() > 0; // Update entity if not exists ReadEntity oldEntity = event.getInputs().iterator().next(); Table oldTable = oldEntity.getTable(); for (WriteEntity writeEntity : event.getOutputs()) { if (writeEntity.getType() == Entity.Type.TABLE) { Table newTable = writeEntity.getTable(); // Hive sends with both old and new table names in the outputs which is weird. So skipping // that with the below check if (!newTable.getDbName().equals(oldTable.getDbName()) || !newTable.getTableName().equals(oldTable.getTableName())) { final String oldQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable); final String newQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable); // Create/update old table entity - create entity with oldQFNme and old tableName if it // doesnt exist. If exists, will update // We always use the new entity while creating the table since some flags, attributes of // the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since // the table doesnt exist in hive anymore final LinkedHashMap<Type, Referenceable> tables = createOrUpdateEntities(dgiBridge, event, writeEntity, true); Referenceable tableEntity = tables.get(Type.TABLE); // Reset regular column QF Name to old Name and create a new partial notification request // to replace old column QFName to newName to retain any existing traits replaceColumnQFName( event, (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.COLUMNS), oldQualifiedName, newQualifiedName); // Reset partition key column QF Name to old Name and create a new partial notification // request to replace old column QFName to newName to retain any existing traits replaceColumnQFName( event, (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.PART_COLS), oldQualifiedName, newQualifiedName); // Reset SD QF Name to old Name and create a new partial notification request to replace // old SD QFName to newName to retain any existing traits replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName); // Reset Table QF Name to old Name and create a new partial notification request to // replace old Table QFName to newName replaceTableQFName( event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName); } } } }
@Test public void testCreateTable() throws Exception { String tableName = tableName(); String dbName = createDatabase(); String colName = "col" + random(); runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)"); assertTableIsRegistered(dbName, tableName); // there is only one instance of column registered String colId = assertColumnIsRegistered(colName); Referenceable colEntity = dgiCLient.getEntity(colId); Assert.assertEquals( colEntity.get("qualifiedName"), String.format( "%s.%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME)); tableName = createTable(); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); Referenceable tableRef = dgiCLient.getEntity(tableId); Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name()); Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment"); String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName); Assert.assertEquals( tableRef.get("name"), "default." + tableName.toLowerCase() + "@" + CLUSTER_NAME); final Referenceable sdRef = (Referenceable) tableRef.get("sd"); Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false); // Create table where database doesn't exist, will create database instance as well assertDatabaseIsRegistered(DEFAULT_DB); }
private void processHiveEntity( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, Set<String> dataSetsProcessed, SortedMap<Entity, Referenceable> dataSets, Set<Referenceable> entities) throws Exception { if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) { final String tblQFName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable()); if (!dataSetsProcessed.contains(tblQFName)) { LinkedHashMap<Type, Referenceable> result = createOrUpdateEntities(dgiBridge, event, entity, false); dataSets.put(entity, result.get(Type.TABLE)); dataSetsProcessed.add(tblQFName); entities.addAll(result.values()); } } else if (entity.getType() == Type.DFS_DIR) { final String pathUri = lower(new Path(entity.getLocation()).toString()); LOG.info("Registering DFS Path {} ", pathUri); if (!dataSetsProcessed.contains(pathUri)) { Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri); dataSets.put(entity, hdfsPath); dataSetsProcessed.add(pathUri); entities.add(hdfsPath); } } }
private void deleteTable( HiveMetaStoreBridge dgiBridge, HiveEventContext event, WriteEntity output) { final String tblQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), output.getTable()); LOG.info("Deleting table {} ", tblQualifiedName); event.addMessage( new HookNotification.EntityDeleteRequest( event.getUser(), HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tblQualifiedName)); }
@Test public void testLineage() throws Exception { String table1 = createTable(false); String db2 = createDatabase(); String table2 = tableName(); String query = String.format("create table %s.%s as select * from %s", db2, table2, table1); runCommand(query); String table1Id = assertTableIsRegistered(DEFAULT_DB, table1); String table2Id = assertTableIsRegistered(db2, table2); String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, db2, table2); JSONObject response = dgiCLient.getInputGraph(datasetName); JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices"); Assert.assertTrue(vertices.has(table1Id)); Assert.assertTrue(vertices.has(table2Id)); datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, table1); response = dgiCLient.getOutputGraph(datasetName); vertices = response.getJSONObject("values").getJSONObject("vertices"); Assert.assertTrue(vertices.has(table1Id)); Assert.assertTrue(vertices.has(table2Id)); }
private void handleExternalTables( final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { List<Referenceable> entities = new ArrayList<>(); final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); Table hiveTable = hiveEntity.getTable(); // Refresh to get the correct location hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); final String location = lower(hiveTable.getDataLocation().toString()); if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setName(location); SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); } }; SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(hiveEntity, tables.get(Type.TABLE)); } }; Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); if (isCreateOp(event)) { processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); } entities.addAll(tables.values()); entities.add(processReferenceable); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); } }
private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { assert event.getInputs() != null && event.getInputs().size() == 1; assert event.getOutputs() != null && event.getOutputs().size() > 0; Table oldTable = event.getInputs().iterator().next().getTable(); List<FieldSchema> oldColList = oldTable.getAllCols(); Table outputTbl = event.getOutputs().iterator().next().getTable(); outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName()); List<FieldSchema> newColList = outputTbl.getAllCols(); assert oldColList.size() == newColList.size(); Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList); String oldColName = changedColNamePair.getLeft(); String newColName = changedColNamePair.getRight(); for (WriteEntity writeEntity : event.getOutputs()) { if (writeEntity.getType() == Type.TABLE) { Table newTable = writeEntity.getTable(); createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable); final String newQualifiedTableName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable); String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName); String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName); Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName()); newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName); event.addMessage( new HookNotification.EntityPartialUpdateRequest( event.getUser(), HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName, newColEntity)); } } handleEventOutputs(dgiBridge, event, Type.TABLE); }