/** * * * <ul> * <li>The constructor takes in a dataset {@link Config} which MUST have a comma separated list * of destination formats at key, {@value #DESTINATION_CONVERSION_FORMATS_KEY} * <li>Conversion configuration for a format can be set by using destination format as prefix. * <li>E.g. If {@value #DESTINATION_CONVERSION_FORMATS_KEY}=flattenedOrc,nestedOrc.<br> * The destination table name for flattened ORC is set at flattenedOrc.tableName<br> * And the destination table name for nested ORC is set at nestedOrc.tableName * </ul> * * @param fs * @param clientPool * @param table * @param config */ public ConvertibleHiveDataset( FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Config config) { super(fs, clientPool, table, config); Preconditions.checkArgument( config.hasPath(DESTINATION_CONVERSION_FORMATS_KEY), String.format( "Atleast one destination format should be specified at %s.%s. If you do not intend to convert this dataset set %s.%s to true", super.properties.getProperty(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, ""), DESTINATION_CONVERSION_FORMATS_KEY, super.properties.getProperty(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, ""), HiveDatasetFinder.HIVE_DATASET_IS_BLACKLISTED_KEY)); // value for DESTINATION_CONVERSION_FORMATS_KEY can be a TypeSafe list or a comma separated list // of string this.destFormats = Sets.newHashSet(ConfigUtils.getStringList(config, DESTINATION_CONVERSION_FORMATS_KEY)); // For each format create ConversionConfig and store it in a Map<format,conversionConfig> this.destConversionConfigs = Maps.newHashMap(); for (String format : this.destFormats) { if (config.hasPath(format)) { this.destConversionConfigs.put( format, new ConversionConfig(config.getConfig(format), table, format)); } } }
private ConversionConfig(Config config, Table table, String destinationFormat) { Preconditions.checkArgument( config.hasPath(DESTINATION_TABLE_KEY), String.format("Key %s.%s is not specified", destinationFormat, DESTINATION_TABLE_KEY)); Preconditions.checkArgument( config.hasPath(DESTINATION_DB_KEY), String.format("Key %s.%s is not specified", destinationFormat, DESTINATION_DB_KEY)); Preconditions.checkArgument( config.hasPath(DESTINATION_DATA_PATH_KEY), String.format( "Key %s.%s is not specified", destinationFormat, DESTINATION_DATA_PATH_KEY)); // Required this.destinationFormat = destinationFormat; this.destinationTableName = resolveTemplate(config.getString(DESTINATION_TABLE_KEY), table); this.destinationStagingTableName = String.format( "%s_%s", this.destinationTableName, "staging"); // Fixed and non-configurable this.destinationDbName = resolveTemplate(config.getString(DESTINATION_DB_KEY), table); this.destinationDataPath = resolveTemplate(config.getString(DESTINATION_DATA_PATH_KEY), table); // Optional this.clusterBy = ConfigUtils.getStringList(config, CLUSTER_BY_KEY); this.numBuckets = Optional.fromNullable(ConfigUtils.getInt(config, NUM_BUCKETS_KEY, null)); this.hiveRuntimeProperties = ConfigUtils.configToProperties( ConfigUtils.getConfig( config, HIVE_RUNTIME_PROPERTIES_KEY_PREFIX, ConfigFactory.empty())); this.evolutionEnabled = ConfigUtils.getBoolean(config, EVOLUTION_ENABLED, false); this.rowLimit = Optional.fromNullable(ConfigUtils.getInt(config, ROW_LIMIT_KEY, null)); this.sourceDataPathIdentifier = ConfigUtils.getStringList(config, SOURCE_DATA_PATH_IDENTIFIER_KEY); }
private GobblinMetrics buildGobblinMetrics() { // Create tags list ImmutableList.Builder<Tag<?>> tags = new ImmutableList.Builder<>(); tags.add(new Tag<>(GobblinClusterMetricTagNames.APPLICATION_ID, this.applicationId)); tags.add(new Tag<>(GobblinClusterMetricTagNames.APPLICATION_NAME, this.applicationName)); // Intialize Gobblin metrics and start reporters GobblinMetrics gobblinMetrics = GobblinMetrics.get(this.applicationId, null, tags.build()); gobblinMetrics.startMetricReporting(ConfigUtils.configToProperties(config)); return gobblinMetrics; }