Example #1
0
  /**
   * Update a {@link Dataset} for the given dataset or view URI.
   *
   * <p>You can add columns, remove columns, or change the data type of columns in your dataset,
   * provided you don't attempt a change that is incompatible with written data. Avro defines rules
   * for compatible schema evolution. See <a
   * href="http://kitesdk.org/docs/current/Schema-Evolution.html">Schema Evolution</a>.
   *
   * <p>This method updates the dataset descriptor, so you can also add or change properties.
   *
   * <p>The recommended way to update a dataset descriptor is to build it based on an existing
   * descriptor. Use {@link DatasetDescriptor.Builder(DatasetDescriptor)} to build a
   * DatasetDescriptor based on an existing instance.
   *
   * <p>You cannot change a dataset format or partition strategy.
   *
   * <p>URIs must begin with {@code dataset:}. The remainder of the URI is implementation specific,
   * depending on the dataset scheme.
   *
   * @param uri a {@code Dataset} URI
   * @param type a Java class that represents an entity in the dataset
   * @param <E> the type used for readers and writers created by this {@code Dataset}
   * @param <D> the type of {@code Dataset} expected
   * @return a {@code Dataset} for the given URI
   * @throws NullPointerException if {@code uri}, {@code descriptor}, or {@code type} is {@code
   *     null}
   * @throws IllegalArgumentException if {@code uri} is not a dataset URI
   * @throws DatasetNotFoundException if there is no dataset for the given URI
   * @throws UnsupportedOperationException if descriptor updates are not supported by the
   *     implementation
   * @throws ConcurrentSchemaModificationException if the {@code Dataset} schema is updated
   *     concurrently
   * @throws IncompatibleSchemaException if the schema is not compatible with previous schemas, or
   *     with existing datasets with shared storage (for example, in the same HBase table)
   */
  @SuppressWarnings("unchecked")
  public static <E, D extends Dataset<E>> D update(
      URI uri, DatasetDescriptor descriptor, Class<E> type) {
    Preconditions.checkArgument(
        URIBuilder.DATASET_SCHEME.equals(uri.getScheme()), "Not a dataset or view URI: " + uri);
    Preconditions.checkNotNull(
        type,
        "The entity type can't be null, use Object.class to have the type"
            + " determined by the schema.");

    Pair<DatasetRepository, Map<String, String>> pair =
        Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
    DatasetRepository repo = pair.first();
    Map<String, String> uriOptions = pair.second();

    return (D)
        repo.update(
            uriOptions.get(URIBuilder.NAMESPACE_OPTION),
            uriOptions.get(URIBuilder.DATASET_NAME_OPTION),
            descriptor,
            type);
  }