Example #1
0
 /**
  * Checks the name and schema for known compatibility issues and warns.
  *
  * <p>If the column names are not compatible across components, this will warn the user.
  *
  * @param namespace a String namespace
  * @param datasetName a String dataset name
  * @param schema a {@link Schema}
  */
 public static void checkAndWarn(String namespace, String datasetName, Schema schema) {
   try {
     checkDatasetName(namespace, datasetName);
     checkSchema(schema);
   } catch (IllegalArgumentException e) {
     LOG.warn(e.getMessage());
   } catch (IllegalStateException e) {
     LOG.warn(e.getMessage());
   }
 }
Example #2
0
  /**
   * Precondition-style validation that the DatasetDescriptor is compatible.
   *
   * @param descriptor a {@link DatasetDescriptor}
   */
  public static void checkDescriptor(DatasetDescriptor descriptor) {
    Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

    Schema schema = descriptor.getSchema();
    checkSchema(schema);

    if (descriptor.isPartitioned()) {
      // marked as [BUG] because this is checked in DatasetDescriptor
      Preconditions.checkArgument(
          schema.getType() == Schema.Type.RECORD,
          "[BUG] Partitioned datasets must have record schemas");

      Set<String> names = Sets.newHashSet();
      for (Schema.Field field : schema.getFields()) {
        names.add(field.name());
      }

      List<String> incompatible = Lists.newArrayList();
      List<String> duplicates = Lists.newArrayList();
      for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) {
        String name = fp.getName();
        if (!isCompatibleName(name)) {
          incompatible.add(name);
        } else if (names.contains(name)) {
          duplicates.add(name);
        } else {
          names.add(name);
        }
      }
      Preconditions.checkState(
          incompatible.isEmpty(),
          "Hive incompatible: partition names are not alphanumeric (plus '_'): %s",
          Joiner.on(", ").join(incompatible));
      Preconditions.checkState(
          duplicates.isEmpty(),
          "Hive incompatible: partition names duplicate data fields: %s",
          Joiner.on(", ").join(duplicates));
    }
  }