/** * Checks the name and schema for known compatibility issues and warns. * * <p>If the column names are not compatible across components, this will warn the user. * * @param namespace a String namespace * @param datasetName a String dataset name * @param schema a {@link Schema} */ public static void checkAndWarn(String namespace, String datasetName, Schema schema) { try { checkDatasetName(namespace, datasetName); checkSchema(schema); } catch (IllegalArgumentException e) { LOG.warn(e.getMessage()); } catch (IllegalStateException e) { LOG.warn(e.getMessage()); } }
/** * Precondition-style validation that the DatasetDescriptor is compatible. * * @param descriptor a {@link DatasetDescriptor} */ public static void checkDescriptor(DatasetDescriptor descriptor) { Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); Schema schema = descriptor.getSchema(); checkSchema(schema); if (descriptor.isPartitioned()) { // marked as [BUG] because this is checked in DatasetDescriptor Preconditions.checkArgument( schema.getType() == Schema.Type.RECORD, "[BUG] Partitioned datasets must have record schemas"); Set<String> names = Sets.newHashSet(); for (Schema.Field field : schema.getFields()) { names.add(field.name()); } List<String> incompatible = Lists.newArrayList(); List<String> duplicates = Lists.newArrayList(); for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) { String name = fp.getName(); if (!isCompatibleName(name)) { incompatible.add(name); } else if (names.contains(name)) { duplicates.add(name); } else { names.add(name); } } Preconditions.checkState( incompatible.isEmpty(), "Hive incompatible: partition names are not alphanumeric (plus '_'): %s", Joiner.on(", ").join(incompatible)); Preconditions.checkState( duplicates.isEmpty(), "Hive incompatible: partition names duplicate data fields: %s", Joiner.on(", ").join(duplicates)); } }