private void processHiveEntity( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, Set<String> dataSetsProcessed, SortedMap<Entity, Referenceable> dataSets, Set<Referenceable> entities) throws Exception { if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) { final String tblQFName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable()); if (!dataSetsProcessed.contains(tblQFName)) { LinkedHashMap<Type, Referenceable> result = createOrUpdateEntities(dgiBridge, event, entity, false); dataSets.put(entity, result.get(Type.TABLE)); dataSetsProcessed.add(tblQFName); entities.addAll(result.values()); } } else if (entity.getType() == Type.DFS_DIR) { final String pathUri = lower(new Path(entity.getLocation()).toString()); LOG.info("Registering DFS Path {} ", pathUri); if (!dataSetsProcessed.contains(pathUri)) { Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri); dataSets.put(entity, hdfsPath); dataSetsProcessed.add(pathUri); entities.add(hdfsPath); } } }
private static void addDatasets( HiveOperation op, StringBuilder buffer, final Map<Entity, Referenceable> refs) { if (refs != null) { for (Entity input : refs.keySet()) { final Entity entity = input; // HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE // operations if (addQueryType(op, entity)) { buffer.append(SEP); buffer.append(((WriteEntity) entity).getWriteType().name()); } if (Type.DFS_DIR.equals(entity.getType()) || Type.LOCAL_DIR.equals(entity.getType())) { LOG.debug( "Skipping dfs dir addition into process qualified name {} ", refs.get(input).get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME)); } else { buffer.append(SEP); String dataSetQlfdName = (String) refs.get(input).get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME); // '/' breaks query parsing on ATLAS buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", "")); } } } }
private Entity getEntityByType(Set<? extends Entity> entities, Type entityType) { for (Entity entity : entities) { if (entity.getType() == entityType) { return entity; } } return null; }
private LinkedHashMap<Type, Referenceable> handleEventOutputs( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Type entityType) throws Exception { for (Entity entity : event.getOutputs()) { if (entity.getType() == entityType) { return createOrUpdateEntities(dgiBridge, event, entity, true); } } return null; }
// Build the hierarchy of authorizable object for the given entity type. private List<DBModelAuthorizable> getAuthzHierarchyFromEntity(Entity entity) { List<DBModelAuthorizable> objectHierarchy = new ArrayList<DBModelAuthorizable>(); switch (entity.getType()) { case TABLE: objectHierarchy.add(new Database(entity.getTable().getDbName())); objectHierarchy.add(new Table(entity.getTable().getTableName())); break; case PARTITION: case DUMMYPARTITION: objectHierarchy.add(new Database(entity.getPartition().getTable().getDbName())); objectHierarchy.add(new Table(entity.getPartition().getTable().getTableName())); break; case DFS_DIR: case LOCAL_DIR: try { objectHierarchy.add( parseURI(entity.toString(), entity.getType().equals(Entity.Type.LOCAL_DIR))); } catch (Exception e) { throw new AuthorizationException("Failed to get File URI", e); } break; case DATABASE: case FUNCTION: // TODO use database entities from compiler instead of capturing from AST break; default: throw new UnsupportedOperationException( "Unsupported entity type " + entity.getType().name()); } return objectHierarchy; }
private void handleExternalTables( final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { List<Referenceable> entities = new ArrayList<>(); final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); Table hiveTable = hiveEntity.getTable(); // Refresh to get the correct location hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); final String location = lower(hiveTable.getDataLocation().toString()); if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setName(location); SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); } }; SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(hiveEntity, tables.get(Type.TABLE)); } }; Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); if (isCreateOp(event)) { processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); } entities.addAll(tables.values()); entities.add(processReferenceable); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); } }
private static boolean addQueryType(HiveOperation op, Entity entity) { if (WriteEntity.class.isAssignableFrom(entity.getClass())) { if (((WriteEntity) entity).getWriteType() != null && op.equals(HiveOperation.QUERY)) { switch (((WriteEntity) entity).getWriteType()) { case INSERT: case INSERT_OVERWRITE: case UPDATE: case DELETE: case PATH_WRITE: return true; default: } } } return false; }
// Check if the given entity is identified as dummy by Hive compilers. private boolean isDummyEntity(Entity entity) { return entity.isDummy(); }
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws Exception { Database db = null; Table table = null; Partition partition = null; LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>(); List<Referenceable> entities = new ArrayList<>(); switch (entity.getType()) { case DATABASE: db = entity.getDatabase(); break; case TABLE: table = entity.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; case PARTITION: partition = entity.getPartition(); table = partition.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; } db = dgiBridge.hiveClient.getDatabase(db.getName()); Referenceable dbEntity = dgiBridge.createDBInstance(db); entities.add(dbEntity); result.put(Type.DATABASE, dbEntity); Referenceable tableEntity = null; if (table != null) { if (existTable != null) { table = existTable; } else { table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); } // If its an external table, even though the temp table skip flag is on, // we create the table since we need the HDFS path to temp table lineage. if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) { LOG.debug( "Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name()); } else { tableEntity = dgiBridge.createTableInstance(dbEntity, table); entities.add(tableEntity); result.put(Type.TABLE, tableEntity); } } event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); return result; }
@Override public int compare(Entity o1, Entity o2) { return o1.getName().toLowerCase().compareTo(o2.getName().toLowerCase()); }