예제 #1
0
  private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception {
    // crappy, no easy of getting new name
    assert event.getInputs() != null && event.getInputs().size() == 1;
    assert event.getOutputs() != null && event.getOutputs().size() > 0;

    // Update entity if not exists
    ReadEntity oldEntity = event.getInputs().iterator().next();
    Table oldTable = oldEntity.getTable();

    for (WriteEntity writeEntity : event.getOutputs()) {
      if (writeEntity.getType() == Entity.Type.TABLE) {
        Table newTable = writeEntity.getTable();
        // Hive sends with both old and new table names in the outputs which is weird. So skipping
        // that with the below check
        if (!newTable.getDbName().equals(oldTable.getDbName())
            || !newTable.getTableName().equals(oldTable.getTableName())) {
          final String oldQualifiedName =
              dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable);
          final String newQualifiedName =
              dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);

          // Create/update old table entity - create entity with oldQFNme and old tableName if it
          // doesnt exist. If exists, will update
          // We always use the new entity while creating the table since some flags, attributes of
          // the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since
          // the table doesnt exist in hive anymore
          final LinkedHashMap<Type, Referenceable> tables =
              createOrUpdateEntities(dgiBridge, event, writeEntity, true);
          Referenceable tableEntity = tables.get(Type.TABLE);

          // Reset regular column QF Name to old Name and create a new partial notification request
          // to replace old column QFName to newName to retain any existing traits
          replaceColumnQFName(
              event,
              (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.COLUMNS),
              oldQualifiedName,
              newQualifiedName);

          // Reset partition key column QF Name to old Name and create a new partial notification
          // request to replace old column QFName to newName to retain any existing traits
          replaceColumnQFName(
              event,
              (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.PART_COLS),
              oldQualifiedName,
              newQualifiedName);

          // Reset SD QF Name to old Name and create a new partial notification request to replace
          // old SD QFName to newName to retain any existing traits
          replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName);

          // Reset Table QF Name to old Name and create a new partial notification request to
          // replace old Table QFName to newName
          replaceTableQFName(
              event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName);
        }
      }
    }
  }
예제 #2
0
  @Test
  public void testCreateTable() throws Exception {
    String tableName = tableName();
    String dbName = createDatabase();
    String colName = "col" + random();
    runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
    assertTableIsRegistered(dbName, tableName);

    // there is only one instance of column registered
    String colId = assertColumnIsRegistered(colName);
    Referenceable colEntity = dgiCLient.getEntity(colId);
    Assert.assertEquals(
        colEntity.get("qualifiedName"),
        String.format(
            "%s.%s.%s@%s",
            dbName.toLowerCase(), tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME));

    tableName = createTable();
    String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
    Referenceable tableRef = dgiCLient.getEntity(tableId);
    Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name());
    Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
    String entityName =
        HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
    Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName);
    Assert.assertEquals(
        tableRef.get("name"), "default." + tableName.toLowerCase() + "@" + CLUSTER_NAME);

    final Referenceable sdRef = (Referenceable) tableRef.get("sd");
    Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false);

    // Create table where database doesn't exist, will create database instance as well
    assertDatabaseIsRegistered(DEFAULT_DB);
  }
예제 #3
0
 private void processHiveEntity(
     HiveMetaStoreBridge dgiBridge,
     HiveEventContext event,
     Entity entity,
     Set<String> dataSetsProcessed,
     SortedMap<Entity, Referenceable> dataSets,
     Set<Referenceable> entities)
     throws Exception {
   if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) {
     final String tblQFName =
         dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable());
     if (!dataSetsProcessed.contains(tblQFName)) {
       LinkedHashMap<Type, Referenceable> result =
           createOrUpdateEntities(dgiBridge, event, entity, false);
       dataSets.put(entity, result.get(Type.TABLE));
       dataSetsProcessed.add(tblQFName);
       entities.addAll(result.values());
     }
   } else if (entity.getType() == Type.DFS_DIR) {
     final String pathUri = lower(new Path(entity.getLocation()).toString());
     LOG.info("Registering DFS Path {} ", pathUri);
     if (!dataSetsProcessed.contains(pathUri)) {
       Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri);
       dataSets.put(entity, hdfsPath);
       dataSetsProcessed.add(pathUri);
       entities.add(hdfsPath);
     }
   }
 }
예제 #4
0
 private void deleteTable(
     HiveMetaStoreBridge dgiBridge, HiveEventContext event, WriteEntity output) {
   final String tblQualifiedName =
       HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), output.getTable());
   LOG.info("Deleting table {} ", tblQualifiedName);
   event.addMessage(
       new HookNotification.EntityDeleteRequest(
           event.getUser(),
           HiveDataTypes.HIVE_TABLE.getName(),
           AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
           tblQualifiedName));
 }
예제 #5
0
  @Test
  public void testLineage() throws Exception {
    String table1 = createTable(false);

    String db2 = createDatabase();
    String table2 = tableName();

    String query = String.format("create table %s.%s as select * from %s", db2, table2, table1);
    runCommand(query);
    String table1Id = assertTableIsRegistered(DEFAULT_DB, table1);
    String table2Id = assertTableIsRegistered(db2, table2);

    String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, db2, table2);
    JSONObject response = dgiCLient.getInputGraph(datasetName);
    JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices");
    Assert.assertTrue(vertices.has(table1Id));
    Assert.assertTrue(vertices.has(table2Id));

    datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, table1);
    response = dgiCLient.getOutputGraph(datasetName);
    vertices = response.getJSONObject("values").getJSONObject("vertices");
    Assert.assertTrue(vertices.has(table1Id));
    Assert.assertTrue(vertices.has(table2Id));
  }
예제 #6
0
  private void handleExternalTables(
      final HiveMetaStoreBridge dgiBridge,
      final HiveEventContext event,
      final LinkedHashMap<Type, Referenceable> tables)
      throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE);
    Table hiveTable = hiveEntity.getTable();
    // Refresh to get the correct location
    hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());

    final String location = lower(hiveTable.getDataLocation().toString());
    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
      LOG.info("Registering external table process {} ", event.getQueryStr());
      final ReadEntity dfsEntity = new ReadEntity();
      dfsEntity.setTyp(Type.DFS_DIR);
      dfsEntity.setName(location);

      SortedMap<Entity, Referenceable> inputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
            }
          };

      SortedMap<Entity, Referenceable> outputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(hiveEntity, tables.get(Type.TABLE));
            }
          };

      Referenceable processReferenceable =
          getProcessReferenceable(dgiBridge, event, inputs, outputs);
      String tableQualifiedName =
          dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable);

      if (isCreateOp(event)) {
        processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
      }
      entities.addAll(tables.values());
      entities.add(processReferenceable);
      event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
  }
예제 #7
0
  private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event)
      throws Exception {
    assert event.getInputs() != null && event.getInputs().size() == 1;
    assert event.getOutputs() != null && event.getOutputs().size() > 0;

    Table oldTable = event.getInputs().iterator().next().getTable();
    List<FieldSchema> oldColList = oldTable.getAllCols();
    Table outputTbl = event.getOutputs().iterator().next().getTable();
    outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName());
    List<FieldSchema> newColList = outputTbl.getAllCols();
    assert oldColList.size() == newColList.size();

    Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList);
    String oldColName = changedColNamePair.getLeft();
    String newColName = changedColNamePair.getRight();
    for (WriteEntity writeEntity : event.getOutputs()) {
      if (writeEntity.getType() == Type.TABLE) {
        Table newTable = writeEntity.getTable();
        createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable);
        final String newQualifiedTableName =
            dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);
        String oldColumnQFName =
            HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName);
        String newColumnQFName =
            HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName);
        Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
        newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);

        event.addMessage(
            new HookNotification.EntityPartialUpdateRequest(
                event.getUser(),
                HiveDataTypes.HIVE_COLUMN.getName(),
                AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                oldColumnQFName,
                newColEntity));
      }
    }
    handleEventOutputs(dgiBridge, event, Type.TABLE);
  }