private void handleExternalTables( final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { List<Referenceable> entities = new ArrayList<>(); final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); Table hiveTable = hiveEntity.getTable(); // Refresh to get the correct location hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); final String location = lower(hiveTable.getDataLocation().toString()); if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setName(location); SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); } }; SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) { { put(hiveEntity, tables.get(Type.TABLE)); } }; Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); if (isCreateOp(event)) { processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); } entities.addAll(tables.values()); entities.add(processReferenceable); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); } }
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities( HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws Exception { Database db = null; Table table = null; Partition partition = null; LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>(); List<Referenceable> entities = new ArrayList<>(); switch (entity.getType()) { case DATABASE: db = entity.getDatabase(); break; case TABLE: table = entity.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; case PARTITION: partition = entity.getPartition(); table = partition.getTable(); db = dgiBridge.hiveClient.getDatabase(table.getDbName()); break; } db = dgiBridge.hiveClient.getDatabase(db.getName()); Referenceable dbEntity = dgiBridge.createDBInstance(db); entities.add(dbEntity); result.put(Type.DATABASE, dbEntity); Referenceable tableEntity = null; if (table != null) { if (existTable != null) { table = existTable; } else { table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); } // If its an external table, even though the temp table skip flag is on, // we create the table since we need the HDFS path to temp table lineage. if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) { LOG.debug( "Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name()); } else { tableEntity = dgiBridge.createTableInstance(dbEntity, table); entities.add(tableEntity); result.put(Type.TABLE, tableEntity); } } event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities)); return result; }
private boolean isExternal(Table table) { return TableType.EXTERNAL_TABLE.equals(table.getTableType()); }
@Override public final CatalogProtos.TableDescProto getTable(String databaseName, final String tableName) throws CatalogException { org.apache.hadoop.hive.ql.metadata.Table table = null; HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null; Path path = null; String storeType = null; org.apache.tajo.catalog.Schema schema = null; KeyValueSet options = null; TableStats stats = null; PartitionMethodDesc partitions = null; ////////////////////////////////// // set tajo table schema. ////////////////////////////////// try { // get hive table schema try { client = clientPool.getClient(); table = HiveCatalogUtil.getTable(client.getHiveClient(), databaseName, tableName); path = table.getPath(); } catch (NoSuchObjectException nsoe) { throw new UndefinedTableException(tableName); } catch (Exception e) { throw new TajoInternalError(e); } // convert HiveCatalogStore field schema into tajo field schema. schema = new org.apache.tajo.catalog.Schema(); List<FieldSchema> fieldSchemaList = table.getCols(); boolean isPartitionKey = false; for (FieldSchema eachField : fieldSchemaList) { isPartitionKey = false; if (table.getPartitionKeys() != null) { for (FieldSchema partitionKey : table.getPartitionKeys()) { if (partitionKey.getName().equals(eachField.getName())) { isPartitionKey = true; } } } if (!isPartitionKey) { String fieldName = databaseName + CatalogConstants.IDENTIFIER_DELIMITER + tableName + CatalogConstants.IDENTIFIER_DELIMITER + eachField.getName(); TajoDataTypes.Type dataType = HiveCatalogUtil.getTajoFieldType(eachField.getType().toString()); schema.addColumn(fieldName, dataType); } } // validate field schema. HiveCatalogUtil.validateSchema(table); stats = new TableStats(); options = new KeyValueSet(); options.putAll(table.getParameters()); options.remove("EXTERNAL"); Properties properties = table.getMetadata(); if (properties != null) { // set field delimiter String fieldDelimiter = "", nullFormat = ""; if (properties.getProperty(serdeConstants.FIELD_DELIM) != null) { fieldDelimiter = properties.getProperty(serdeConstants.FIELD_DELIM); } else { // if hive table used default row format delimiter, Properties doesn't have it. // So, Tajo must set as follows: fieldDelimiter = "\u0001"; } // set null format if (properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT) != null) { nullFormat = properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT); } else { nullFormat = "\\N"; } options.remove(serdeConstants.SERIALIZATION_NULL_FORMAT); // set file output format String fileOutputformat = properties.getProperty(hive_metastoreConstants.FILE_OUTPUT_FORMAT); storeType = HiveCatalogUtil.getStoreType(fileOutputformat); if (storeType.equalsIgnoreCase("TEXT")) { options.set( StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter)); options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava(nullFormat)); } else if (storeType.equals("RCFILE")) { options.set(StorageConstants.RCFILE_NULL, StringEscapeUtils.escapeJava(nullFormat)); String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB); if (LazyBinaryColumnarSerDe.class.getName().equals(serde)) { options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); } else if (ColumnarSerDe.class.getName().equals(serde)) { options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); } } else if (storeType.equals("SEQUENCEFILE")) { options.set( StorageConstants.SEQUENCEFILE_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter)); options.set(StorageConstants.SEQUENCEFILE_NULL, StringEscapeUtils.escapeJava(nullFormat)); String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB); if (LazyBinarySerDe.class.getName().equals(serde)) { options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); } else if (LazySimpleSerDe.class.getName().equals(serde)) { options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); } } // set data size long totalSize = 0; if (properties.getProperty("totalSize") != null) { totalSize = Long.parseLong(properties.getProperty("totalSize")); } else { try { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { totalSize = fs.getContentSummary(path).getLength(); } } catch (IOException ioe) { throw new TajoInternalError(ioe); } } stats.setNumBytes(totalSize); } // set partition keys List<FieldSchema> partitionKeys = table.getPartitionKeys(); if (null != partitionKeys) { org.apache.tajo.catalog.Schema expressionSchema = new org.apache.tajo.catalog.Schema(); StringBuilder sb = new StringBuilder(); if (partitionKeys.size() > 0) { for (int i = 0; i < partitionKeys.size(); i++) { FieldSchema fieldSchema = partitionKeys.get(i); TajoDataTypes.Type dataType = HiveCatalogUtil.getTajoFieldType(fieldSchema.getType().toString()); String fieldName = databaseName + CatalogConstants.IDENTIFIER_DELIMITER + tableName + CatalogConstants.IDENTIFIER_DELIMITER + fieldSchema.getName(); expressionSchema.addColumn(new Column(fieldName, dataType)); if (i > 0) { sb.append(","); } sb.append(fieldSchema.getName()); } partitions = new PartitionMethodDesc( databaseName, tableName, PartitionType.COLUMN, sb.toString(), expressionSchema); } } } finally { if (client != null) client.release(); } TableMeta meta = new TableMeta(storeType, options); TableDesc tableDesc = new TableDesc(databaseName + "." + tableName, schema, meta, path.toUri()); if (table.getTableType().equals(TableType.EXTERNAL_TABLE)) { tableDesc.setExternal(true); } if (stats != null) { tableDesc.setStats(stats); } if (partitions != null) { tableDesc.setPartitionMethod(partitions); } return tableDesc.getProto(); }
private boolean isManaged(Table table) { return TableType.MANAGED_TABLE.equals(table.getTableType()); }