示例#1
0
 @Test
 public void testCreateTableMetadata() throws TableLoadingException {
   Table table = catalog_.getDb("functional").getTable("alltypes");
   // Tables are created via Impala so the metadata should have been populated properly.
   // alltypes is an external table.
   assertEquals(System.getProperty("user.name"), table.getMetaStoreTable().getOwner());
   assertEquals(TableType.EXTERNAL_TABLE.toString(), table.getMetaStoreTable().getTableType());
   // alltypesinsert is created using CREATE TABLE LIKE and is a MANAGED table
   table = catalog_.getDb("functional").getTable("alltypesinsert");
   assertEquals(System.getProperty("user.name"), table.getMetaStoreTable().getOwner());
   assertEquals(TableType.MANAGED_TABLE.toString(), table.getMetaStoreTable().getTableType());
 }
  @Before
  public void createTable() throws Exception {
    // Use Junit's Assume to skip running this fixture against any storage formats whose
    // SerDe is in the disabled serdes list.
    Assume.assumeTrue(!DISABLED_SERDES.contains(serdeClass));

    String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName;
    try {
      client.dropTable(databaseName, tableName);
    } catch (Exception e) {
      // Can fail with NoSuchObjectException.
    }

    Table tbl = new Table();
    tbl.setDbName(databaseName);
    tbl.setTableName(tableName);
    if (isTableExternal()) {
      tbl.setTableType(TableType.EXTERNAL_TABLE.toString());
    } else {
      tbl.setTableType(TableType.MANAGED_TABLE.toString());
    }
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(getTableColumns());

    tbl.setPartitionKeys(getPartitionKeys());
    tbl.setSd(sd);

    sd.setBucketCols(new ArrayList<String>(2));
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    if (isTableExternal()) {
      sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE");
    }
    sd.getSerdeInfo().setSerializationLib(serdeClass);
    sd.setInputFormat(inputFormatClass);
    sd.setOutputFormat(outputFormatClass);

    Map<String, String> tableParams = new HashMap<String, String>();
    if (isTableExternal()) {
      tableParams.put("EXTERNAL", "TRUE");
    }
    if (isTableImmutable()) {
      tableParams.put(hive_metastoreConstants.IS_IMMUTABLE, "true");
    }
    StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE);
    tbl.setParameters(tableParams);

    client.createTable(tbl);
  }
  private void handleExternalTables(
      final HiveMetaStoreBridge dgiBridge,
      final HiveEventContext event,
      final LinkedHashMap<Type, Referenceable> tables)
      throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE);
    Table hiveTable = hiveEntity.getTable();
    // Refresh to get the correct location
    hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());

    final String location = lower(hiveTable.getDataLocation().toString());
    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
      LOG.info("Registering external table process {} ", event.getQueryStr());
      final ReadEntity dfsEntity = new ReadEntity();
      dfsEntity.setTyp(Type.DFS_DIR);
      dfsEntity.setName(location);

      SortedMap<Entity, Referenceable> inputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
            }
          };

      SortedMap<Entity, Referenceable> outputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(hiveEntity, tables.get(Type.TABLE));
            }
          };

      Referenceable processReferenceable =
          getProcessReferenceable(dgiBridge, event, inputs, outputs);
      String tableQualifiedName =
          dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable);

      if (isCreateOp(event)) {
        processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
      }
      entities.addAll(tables.values());
      entities.add(processReferenceable);
      event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
  }
  private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(
      HiveMetaStoreBridge dgiBridge,
      HiveEventContext event,
      Entity entity,
      boolean skipTempTables,
      Table existTable)
      throws Exception {
    Database db = null;
    Table table = null;
    Partition partition = null;
    LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>();
    List<Referenceable> entities = new ArrayList<>();

    switch (entity.getType()) {
      case DATABASE:
        db = entity.getDatabase();
        break;

      case TABLE:
        table = entity.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;

      case PARTITION:
        partition = entity.getPartition();
        table = partition.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;
    }

    db = dgiBridge.hiveClient.getDatabase(db.getName());
    Referenceable dbEntity = dgiBridge.createDBInstance(db);

    entities.add(dbEntity);
    result.put(Type.DATABASE, dbEntity);

    Referenceable tableEntity = null;

    if (table != null) {
      if (existTable != null) {
        table = existTable;
      } else {
        table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
      }
      // If its an external table, even though the temp table skip flag is on,
      // we create the table since we need the HDFS path to temp table lineage.
      if (skipTempTables
          && table.isTemporary()
          && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
        LOG.debug(
            "Skipping temporary table registration {} since it is not an external table {} ",
            table.getTableName(),
            table.getTableType().name());

      } else {
        tableEntity = dgiBridge.createTableInstance(dbEntity, table);
        entities.add(tableEntity);
        result.put(Type.TABLE, tableEntity);
      }
    }

    event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    return result;
  }
示例#5
0
  @Override
  public final void createTable(final CatalogProtos.TableDescProto tableDescProto)
      throws CatalogException {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;

    TableDesc tableDesc = new TableDesc(tableDescProto);
    String[] splitted = CatalogUtil.splitFQTableName(tableDesc.getName());
    String databaseName = splitted[0];
    String tableName = splitted[1];

    try {
      client = clientPool.getClient();

      org.apache.hadoop.hive.metastore.api.Table table =
          new org.apache.hadoop.hive.metastore.api.Table();
      table.setDbName(databaseName);
      table.setTableName(tableName);
      table.setParameters(
          new HashMap<String, String>(tableDesc.getMeta().getOptions().getAllKeyValus()));
      // TODO: set owner
      // table.setOwner();

      StorageDescriptor sd = new StorageDescriptor();
      sd.setSerdeInfo(new SerDeInfo());
      sd.getSerdeInfo().setParameters(new HashMap<String, String>());
      sd.getSerdeInfo().setName(table.getTableName());

      // if tajo set location method, thrift client make exception as follows:
      // Caused by: MetaException(message:java.lang.NullPointerException)
      // If you want to modify table path, you have to modify on Hive cli.
      if (tableDesc.isExternal()) {
        table.setTableType(TableType.EXTERNAL_TABLE.name());
        table.putToParameters("EXTERNAL", "TRUE");

        Path tablePath = new Path(tableDesc.getUri());
        FileSystem fs = tablePath.getFileSystem(conf);
        if (fs.isFile(tablePath)) {
          LOG.warn("A table path is a file, but HiveCatalogStore does not allow a file path.");
          sd.setLocation(tablePath.getParent().toString());
        } else {
          sd.setLocation(tablePath.toString());
        }
      }

      // set column information
      List<Column> columns = tableDesc.getSchema().getRootColumns();
      ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(columns.size());

      for (Column eachField : columns) {
        cols.add(
            new FieldSchema(
                eachField.getSimpleName(),
                HiveCatalogUtil.getHiveFieldType(eachField.getDataType()),
                ""));
      }
      sd.setCols(cols);

      // set partition keys
      if (tableDesc.hasPartition()
          && tableDesc.getPartitionMethod().getPartitionType().equals(PartitionType.COLUMN)) {
        List<FieldSchema> partitionKeys = new ArrayList<FieldSchema>();
        for (Column eachPartitionKey :
            tableDesc.getPartitionMethod().getExpressionSchema().getRootColumns()) {
          partitionKeys.add(
              new FieldSchema(
                  eachPartitionKey.getSimpleName(),
                  HiveCatalogUtil.getHiveFieldType(eachPartitionKey.getDataType()),
                  ""));
        }
        table.setPartitionKeys(partitionKeys);
      }

      if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.RCFILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.RCFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
        sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName());
        }

        if (tableDesc.getMeta().getOptions().containsKey(StorageConstants.RCFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.RCFILE_NULL)));
        }
      } else if (tableDesc.getMeta().getStoreType().equals(BuiltinStorages.TEXT)) {
        sd.getSerdeInfo()
            .setSerializationLib(
                org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName());

        String fieldDelimiter =
            tableDesc
                .getMeta()
                .getOption(
                    StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

        // User can use an unicode for filed delimiter such as \u0001, \001.
        // In this case, java console will convert this value into "\\u001".
        // And hive will un-espace this value again.
        // As a result, user can use right field delimiter.
        // So, we have to un-escape this value.
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.SERIALIZATION_FORMAT,
                StringEscapeUtils.unescapeJava(fieldDelimiter));
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
        table.getParameters().remove(StorageConstants.TEXT_DELIMITER);

        if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL)));
          table.getParameters().remove(StorageConstants.TEXT_NULL);
        }
      } else if (tableDesc
          .getMeta()
          .getStoreType()
          .equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName());

        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());

          String fieldDelimiter =
              tableDesc
                  .getMeta()
                  .getOption(
                      StorageConstants.SEQUENCEFILE_DELIMITER,
                      StorageConstants.DEFAULT_FIELD_DELIMITER);

          // User can use an unicode for filed delimiter such as \u0001, \001.
          // In this case, java console will convert this value into "\\u001".
          // And hive will un-espace this value again.
          // As a result, user can use right field delimiter.
          // So, we have to un-escape this value.
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.SERIALIZATION_FORMAT,
                  StringEscapeUtils.unescapeJava(fieldDelimiter));
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER);
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class.getName());
        }

        if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL)));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
        }
      } else {
        if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.PARQUET)) {
          sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName());
          sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName());
          sd.getSerdeInfo()
              .setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName());
        } else {
          throw new UnsupportedException(
              tableDesc.getMeta().getStoreType() + " in HivecatalogStore");
        }
      }

      sd.setSortCols(new ArrayList<Order>());

      table.setSd(sd);
      client.getHiveClient().createTable(table);
    } catch (Throwable t) {
      throw new TajoInternalError(t);
    } finally {
      if (client != null) client.release();
    }
  }
 private boolean isExternal(Table table) {
   return TableType.EXTERNAL_TABLE.equals(table.getTableType());
 }
  @Test
  public void testHiveLocalMetaStore() {

    // Create a table and display it back
    try {
      HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());

      hiveClient.dropTable(
          propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
          propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY),
          true,
          true);

      // Define the cols
      List<FieldSchema> cols = new ArrayList<FieldSchema>();
      cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, ""));
      cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, ""));

      // Values for the StorageDescriptor
      String location =
          new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))
              .getAbsolutePath();
      String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
      String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
      int numBuckets = 16;
      Map<String, String> orcProps = new HashMap<String, String>();
      orcProps.put("orc.compress", "NONE");
      SerDeInfo serDeInfo =
          new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
      List<String> bucketCols = new ArrayList<String>();
      bucketCols.add("id");

      // Build the StorageDescriptor
      StorageDescriptor sd = new StorageDescriptor();
      sd.setCols(cols);
      sd.setLocation(location);
      sd.setInputFormat(inputFormat);
      sd.setOutputFormat(outputFormat);
      sd.setNumBuckets(numBuckets);
      sd.setSerdeInfo(serDeInfo);
      sd.setBucketCols(bucketCols);
      sd.setSortCols(new ArrayList<Order>());
      sd.setParameters(new HashMap<String, String>());

      // Define the table
      Table tbl = new Table();
      tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY));
      tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      tbl.setSd(sd);
      tbl.setOwner(System.getProperty("user.name"));
      tbl.setParameters(new HashMap<String, String>());
      tbl.setViewOriginalText("");
      tbl.setViewExpandedText("");
      tbl.setTableType(TableType.EXTERNAL_TABLE.name());
      List<FieldSchema> partitions = new ArrayList<FieldSchema>();
      partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, ""));
      tbl.setPartitionKeys(partitions);

      // Create the table
      hiveClient.createTable(tbl);

      // Describe the table
      Table createdTable =
          hiveClient.getTable(
              propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
              propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      LOG.info("HIVE: Created Table: {}", createdTable.toString());
      assertThat(
          createdTable.toString(),
          containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)));

    } catch (MetaException e) {
      e.printStackTrace();
    } catch (TException e) {
      e.printStackTrace();
    }
  }