コード例 #1
0
ファイル: Compatibility.java プロジェクト: madhukard/kite
  /**
   * Checks that the {@code existing} {@link DatasetDescriptor} is compatible with {@code test}.
   *
   * @param existing the current {@code DatasetDescriptor} for a dataset
   * @param test a new {@code DatasetDescriptor} for the same dataset
   */
  public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) {
    checkNotChanged("format", existing.getFormat(), test.getFormat());

    checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned());

    if (existing.isPartitioned()) {
      checkNotChanged(
          "partition strategy", existing.getPartitionStrategy(), test.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    Schema oldSchema = existing.getSchema();
    Schema testSchema = test.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) {
      throw new IncompatibleSchemaException(
          "Schema cannot read data "
              + "written using existing schema. Schema: "
              + testSchema.toString(true)
              + "\nExisting schema: "
              + oldSchema.toString(true));
    }
  }
コード例 #2
0
  @Override
  public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) {
    Preconditions.checkArgument(name != null, "Dataset name cannot be null");
    Preconditions.checkArgument(descriptor != null, "DatasetDescriptro cannot be null");

    DatasetDescriptor oldDescriptor = metadataProvider.load(name);

    // oldDescriptor is valid if load didn't throw NoSuchDatasetException

    if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) {
      throw new DatasetRepositoryException(
          "Cannot change dataset format from "
              + oldDescriptor.getFormat()
              + " to "
              + descriptor.getFormat());
    }

    final URI oldLocation = oldDescriptor.getLocation();
    if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) {
      throw new DatasetRepositoryException("Cannot change the dataset's location");
    }

    if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) {
      throw new DatasetRepositoryException(
          "Cannot change an unpartitioned dataset to " + " partitioned or vice versa.");
    } else if (oldDescriptor.isPartitioned()
        && descriptor.isPartitioned()
        && !oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) {
      throw new DatasetRepositoryException(
          "Cannot change partition strategy from "
              + oldDescriptor.getPartitionStrategy()
              + " to "
              + descriptor.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    final Schema oldSchema = oldDescriptor.getSchema();
    final Schema newSchema = descriptor.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) {
      throw new IncompatibleSchemaException(
          "New schema cannot read data "
              + "written using "
              + "old schema. New schema: "
              + newSchema.toString(true)
              + "\nOld schema: "
              + oldSchema.toString(true));
    }

    DatasetDescriptor updatedDescriptor = metadataProvider.update(name, descriptor);
    updatedDescriptor = addRepositoryUri(updatedDescriptor);

    logger.debug(
        "Updated dataset:{} schema:{} datasetPath:{}",
        new Object[] {
          name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation().toString()
        });

    return new FileSystemDataset.Builder()
        .name(name)
        .configuration(conf)
        .descriptor(updatedDescriptor)
        .partitionKey(
            updatedDescriptor.isPartitioned()
                ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey()
                : null)
        .partitionListener(getPartitionListener())
        .build();
  }