Пример #1
0
  @Override
  public final void createTable(final CatalogProtos.TableDescProto tableDescProto)
      throws CatalogException {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;

    TableDesc tableDesc = new TableDesc(tableDescProto);
    String[] splitted = CatalogUtil.splitFQTableName(tableDesc.getName());
    String databaseName = splitted[0];
    String tableName = splitted[1];

    try {
      client = clientPool.getClient();

      org.apache.hadoop.hive.metastore.api.Table table =
          new org.apache.hadoop.hive.metastore.api.Table();
      table.setDbName(databaseName);
      table.setTableName(tableName);
      table.setParameters(
          new HashMap<String, String>(tableDesc.getMeta().getOptions().getAllKeyValus()));
      // TODO: set owner
      // table.setOwner();

      StorageDescriptor sd = new StorageDescriptor();
      sd.setSerdeInfo(new SerDeInfo());
      sd.getSerdeInfo().setParameters(new HashMap<String, String>());
      sd.getSerdeInfo().setName(table.getTableName());

      // if tajo set location method, thrift client make exception as follows:
      // Caused by: MetaException(message:java.lang.NullPointerException)
      // If you want to modify table path, you have to modify on Hive cli.
      if (tableDesc.isExternal()) {
        table.setTableType(TableType.EXTERNAL_TABLE.name());
        table.putToParameters("EXTERNAL", "TRUE");

        Path tablePath = new Path(tableDesc.getUri());
        FileSystem fs = tablePath.getFileSystem(conf);
        if (fs.isFile(tablePath)) {
          LOG.warn("A table path is a file, but HiveCatalogStore does not allow a file path.");
          sd.setLocation(tablePath.getParent().toString());
        } else {
          sd.setLocation(tablePath.toString());
        }
      }

      // set column information
      List<Column> columns = tableDesc.getSchema().getRootColumns();
      ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(columns.size());

      for (Column eachField : columns) {
        cols.add(
            new FieldSchema(
                eachField.getSimpleName(),
                HiveCatalogUtil.getHiveFieldType(eachField.getDataType()),
                ""));
      }
      sd.setCols(cols);

      // set partition keys
      if (tableDesc.hasPartition()
          && tableDesc.getPartitionMethod().getPartitionType().equals(PartitionType.COLUMN)) {
        List<FieldSchema> partitionKeys = new ArrayList<FieldSchema>();
        for (Column eachPartitionKey :
            tableDesc.getPartitionMethod().getExpressionSchema().getRootColumns()) {
          partitionKeys.add(
              new FieldSchema(
                  eachPartitionKey.getSimpleName(),
                  HiveCatalogUtil.getHiveFieldType(eachPartitionKey.getDataType()),
                  ""));
        }
        table.setPartitionKeys(partitionKeys);
      }

      if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.RCFILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.RCFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
        sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName());
        }

        if (tableDesc.getMeta().getOptions().containsKey(StorageConstants.RCFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.RCFILE_NULL)));
        }
      } else if (tableDesc.getMeta().getStoreType().equals(BuiltinStorages.TEXT)) {
        sd.getSerdeInfo()
            .setSerializationLib(
                org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName());

        String fieldDelimiter =
            tableDesc
                .getMeta()
                .getOption(
                    StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

        // User can use an unicode for filed delimiter such as \u0001, \001.
        // In this case, java console will convert this value into "\\u001".
        // And hive will un-espace this value again.
        // As a result, user can use right field delimiter.
        // So, we have to un-escape this value.
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.SERIALIZATION_FORMAT,
                StringEscapeUtils.unescapeJava(fieldDelimiter));
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
        table.getParameters().remove(StorageConstants.TEXT_DELIMITER);

        if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL)));
          table.getParameters().remove(StorageConstants.TEXT_NULL);
        }
      } else if (tableDesc
          .getMeta()
          .getStoreType()
          .equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName());

        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());

          String fieldDelimiter =
              tableDesc
                  .getMeta()
                  .getOption(
                      StorageConstants.SEQUENCEFILE_DELIMITER,
                      StorageConstants.DEFAULT_FIELD_DELIMITER);

          // User can use an unicode for filed delimiter such as \u0001, \001.
          // In this case, java console will convert this value into "\\u001".
          // And hive will un-espace this value again.
          // As a result, user can use right field delimiter.
          // So, we have to un-escape this value.
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.SERIALIZATION_FORMAT,
                  StringEscapeUtils.unescapeJava(fieldDelimiter));
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER);
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class.getName());
        }

        if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL)));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
        }
      } else {
        if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.PARQUET)) {
          sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName());
          sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName());
          sd.getSerdeInfo()
              .setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName());
        } else {
          throw new UnsupportedException(
              tableDesc.getMeta().getStoreType() + " in HivecatalogStore");
        }
      }

      sd.setSortCols(new ArrayList<Order>());

      table.setSd(sd);
      client.getHiveClient().createTable(table);
    } catch (Throwable t) {
      throw new TajoInternalError(t);
    } finally {
      if (client != null) client.release();
    }
  }
Пример #2
0
  @Override
  public final CatalogProtos.TableDescProto getTable(String databaseName, final String tableName)
      throws CatalogException {
    org.apache.hadoop.hive.ql.metadata.Table table = null;
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;
    Path path = null;
    String storeType = null;
    org.apache.tajo.catalog.Schema schema = null;
    KeyValueSet options = null;
    TableStats stats = null;
    PartitionMethodDesc partitions = null;

    //////////////////////////////////
    // set tajo table schema.
    //////////////////////////////////
    try {
      // get hive table schema
      try {
        client = clientPool.getClient();
        table = HiveCatalogUtil.getTable(client.getHiveClient(), databaseName, tableName);
        path = table.getPath();
      } catch (NoSuchObjectException nsoe) {
        throw new UndefinedTableException(tableName);
      } catch (Exception e) {
        throw new TajoInternalError(e);
      }

      // convert HiveCatalogStore field schema into tajo field schema.
      schema = new org.apache.tajo.catalog.Schema();

      List<FieldSchema> fieldSchemaList = table.getCols();
      boolean isPartitionKey = false;
      for (FieldSchema eachField : fieldSchemaList) {
        isPartitionKey = false;

        if (table.getPartitionKeys() != null) {
          for (FieldSchema partitionKey : table.getPartitionKeys()) {
            if (partitionKey.getName().equals(eachField.getName())) {
              isPartitionKey = true;
            }
          }
        }

        if (!isPartitionKey) {
          String fieldName =
              databaseName
                  + CatalogConstants.IDENTIFIER_DELIMITER
                  + tableName
                  + CatalogConstants.IDENTIFIER_DELIMITER
                  + eachField.getName();
          TajoDataTypes.Type dataType =
              HiveCatalogUtil.getTajoFieldType(eachField.getType().toString());
          schema.addColumn(fieldName, dataType);
        }
      }

      // validate field schema.
      HiveCatalogUtil.validateSchema(table);

      stats = new TableStats();
      options = new KeyValueSet();
      options.putAll(table.getParameters());
      options.remove("EXTERNAL");

      Properties properties = table.getMetadata();
      if (properties != null) {
        // set field delimiter
        String fieldDelimiter = "", nullFormat = "";
        if (properties.getProperty(serdeConstants.FIELD_DELIM) != null) {
          fieldDelimiter = properties.getProperty(serdeConstants.FIELD_DELIM);
        } else {
          // if hive table used default row format delimiter, Properties doesn't have it.
          // So, Tajo must set as follows:
          fieldDelimiter = "\u0001";
        }

        // set null format
        if (properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT) != null) {
          nullFormat = properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT);
        } else {
          nullFormat = "\\N";
        }
        options.remove(serdeConstants.SERIALIZATION_NULL_FORMAT);

        // set file output format
        String fileOutputformat =
            properties.getProperty(hive_metastoreConstants.FILE_OUTPUT_FORMAT);
        storeType = HiveCatalogUtil.getStoreType(fileOutputformat);

        if (storeType.equalsIgnoreCase("TEXT")) {
          options.set(
              StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter));
          options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava(nullFormat));
        } else if (storeType.equals("RCFILE")) {
          options.set(StorageConstants.RCFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
          String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB);
          if (LazyBinaryColumnarSerDe.class.getName().equals(serde)) {
            options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
          } else if (ColumnarSerDe.class.getName().equals(serde)) {
            options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
          }
        } else if (storeType.equals("SEQUENCEFILE")) {
          options.set(
              StorageConstants.SEQUENCEFILE_DELIMITER,
              StringEscapeUtils.escapeJava(fieldDelimiter));
          options.set(StorageConstants.SEQUENCEFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
          String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB);
          if (LazyBinarySerDe.class.getName().equals(serde)) {
            options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
          } else if (LazySimpleSerDe.class.getName().equals(serde)) {
            options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
          }
        }

        // set data size
        long totalSize = 0;
        if (properties.getProperty("totalSize") != null) {
          totalSize = Long.parseLong(properties.getProperty("totalSize"));
        } else {
          try {
            FileSystem fs = path.getFileSystem(conf);
            if (fs.exists(path)) {
              totalSize = fs.getContentSummary(path).getLength();
            }
          } catch (IOException ioe) {
            throw new TajoInternalError(ioe);
          }
        }
        stats.setNumBytes(totalSize);
      }

      // set partition keys
      List<FieldSchema> partitionKeys = table.getPartitionKeys();

      if (null != partitionKeys) {
        org.apache.tajo.catalog.Schema expressionSchema = new org.apache.tajo.catalog.Schema();
        StringBuilder sb = new StringBuilder();
        if (partitionKeys.size() > 0) {
          for (int i = 0; i < partitionKeys.size(); i++) {
            FieldSchema fieldSchema = partitionKeys.get(i);
            TajoDataTypes.Type dataType =
                HiveCatalogUtil.getTajoFieldType(fieldSchema.getType().toString());
            String fieldName =
                databaseName
                    + CatalogConstants.IDENTIFIER_DELIMITER
                    + tableName
                    + CatalogConstants.IDENTIFIER_DELIMITER
                    + fieldSchema.getName();
            expressionSchema.addColumn(new Column(fieldName, dataType));
            if (i > 0) {
              sb.append(",");
            }
            sb.append(fieldSchema.getName());
          }
          partitions =
              new PartitionMethodDesc(
                  databaseName, tableName, PartitionType.COLUMN, sb.toString(), expressionSchema);
        }
      }
    } finally {
      if (client != null) client.release();
    }
    TableMeta meta = new TableMeta(storeType, options);
    TableDesc tableDesc = new TableDesc(databaseName + "." + tableName, schema, meta, path.toUri());
    if (table.getTableType().equals(TableType.EXTERNAL_TABLE)) {
      tableDesc.setExternal(true);
    }
    if (stats != null) {
      tableDesc.setStats(stats);
    }
    if (partitions != null) {
      tableDesc.setPartitionMethod(partitions);
    }
    return tableDesc.getProto();
  }