/**
   *
   *
   * <ul>
   *   <li>The constructor takes in a dataset {@link Config} which MUST have a comma separated list
   *       of destination formats at key, {@value #DESTINATION_CONVERSION_FORMATS_KEY}
   *   <li>Conversion configuration for a format can be set by using destination format as prefix.
   *   <li>E.g. If {@value #DESTINATION_CONVERSION_FORMATS_KEY}=flattenedOrc,nestedOrc.<br>
   *       The destination table name for flattened ORC is set at flattenedOrc.tableName<br>
   *       And the destination table name for nested ORC is set at nestedOrc.tableName
   * </ul>
   *
   * @param fs
   * @param clientPool
   * @param table
   * @param config
   */
  public ConvertibleHiveDataset(
      FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Config config) {
    super(fs, clientPool, table, config);

    Preconditions.checkArgument(
        config.hasPath(DESTINATION_CONVERSION_FORMATS_KEY),
        String.format(
            "Atleast one destination format should be specified at %s.%s. If you do not intend to convert this dataset set %s.%s to true",
            super.properties.getProperty(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, ""),
            DESTINATION_CONVERSION_FORMATS_KEY,
            super.properties.getProperty(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, ""),
            HiveDatasetFinder.HIVE_DATASET_IS_BLACKLISTED_KEY));

    // value for DESTINATION_CONVERSION_FORMATS_KEY can be a TypeSafe list or a comma separated list
    // of string
    this.destFormats =
        Sets.newHashSet(ConfigUtils.getStringList(config, DESTINATION_CONVERSION_FORMATS_KEY));

    // For each format create ConversionConfig and store it in a Map<format,conversionConfig>
    this.destConversionConfigs = Maps.newHashMap();

    for (String format : this.destFormats) {
      if (config.hasPath(format)) {
        this.destConversionConfigs.put(
            format, new ConversionConfig(config.getConfig(format), table, format));
      }
    }
  }
    private ConversionConfig(Config config, Table table, String destinationFormat) {

      Preconditions.checkArgument(
          config.hasPath(DESTINATION_TABLE_KEY),
          String.format("Key %s.%s is not specified", destinationFormat, DESTINATION_TABLE_KEY));
      Preconditions.checkArgument(
          config.hasPath(DESTINATION_DB_KEY),
          String.format("Key %s.%s is not specified", destinationFormat, DESTINATION_DB_KEY));
      Preconditions.checkArgument(
          config.hasPath(DESTINATION_DATA_PATH_KEY),
          String.format(
              "Key %s.%s is not specified", destinationFormat, DESTINATION_DATA_PATH_KEY));

      // Required
      this.destinationFormat = destinationFormat;
      this.destinationTableName = resolveTemplate(config.getString(DESTINATION_TABLE_KEY), table);
      this.destinationStagingTableName =
          String.format(
              "%s_%s", this.destinationTableName, "staging"); // Fixed and non-configurable
      this.destinationDbName = resolveTemplate(config.getString(DESTINATION_DB_KEY), table);
      this.destinationDataPath =
          resolveTemplate(config.getString(DESTINATION_DATA_PATH_KEY), table);

      // Optional
      this.clusterBy = ConfigUtils.getStringList(config, CLUSTER_BY_KEY);
      this.numBuckets = Optional.fromNullable(ConfigUtils.getInt(config, NUM_BUCKETS_KEY, null));
      this.hiveRuntimeProperties =
          ConfigUtils.configToProperties(
              ConfigUtils.getConfig(
                  config, HIVE_RUNTIME_PROPERTIES_KEY_PREFIX, ConfigFactory.empty()));
      this.evolutionEnabled = ConfigUtils.getBoolean(config, EVOLUTION_ENABLED, false);
      this.rowLimit = Optional.fromNullable(ConfigUtils.getInt(config, ROW_LIMIT_KEY, null));
      this.sourceDataPathIdentifier =
          ConfigUtils.getStringList(config, SOURCE_DATA_PATH_IDENTIFIER_KEY);
    }
示例#3
0
  private GobblinMetrics buildGobblinMetrics() {
    // Create tags list
    ImmutableList.Builder<Tag<?>> tags = new ImmutableList.Builder<>();
    tags.add(new Tag<>(GobblinClusterMetricTagNames.APPLICATION_ID, this.applicationId));
    tags.add(new Tag<>(GobblinClusterMetricTagNames.APPLICATION_NAME, this.applicationName));

    // Intialize Gobblin metrics and start reporters
    GobblinMetrics gobblinMetrics = GobblinMetrics.get(this.applicationId, null, tags.build());
    gobblinMetrics.startMetricReporting(ConfigUtils.configToProperties(config));

    return gobblinMetrics;
  }