Beispiel #1
0
  @Override
  public int run() throws IOException {
    DatasetRepository repo = getDatasetRepository();

    if (targets == null || targets.isEmpty()) {
      throw new IllegalArgumentException("No views or datasets were specified.");
    }

    for (String uriOrName : targets) {
      if (isViewUri(uriOrName)) {
        View view = Datasets.load(uriOrName);
        Preconditions.checkArgument(
            viewMatches(view.getUri(), uriOrName),
            "Resolved view does not match requested view: " + view.getUri());
        view.deleteAll();
      } else if (isDatasetUri(uriOrName)) {
        Datasets.delete(uriOrName);
      } else {
        repo.delete(namespace, uriOrName);
      }
      console.debug("Deleted {}", uriOrName);
    }

    return 0;
  }
Beispiel #2
0
 private GetStorageKey(View<E> view) {
   DatasetDescriptor descriptor = view.getDataset().getDescriptor();
   // get serializable versions of transient objects
   this.strategyString = descriptor.getPartitionStrategy().toString(false /* no white space */);
   this.schemaString = descriptor.getSchema().toString(false /* no white space */);
   this.type = view.getType();
   if (view instanceof AbstractRefinableView) {
     this.constraints = ((AbstractRefinableView) view).getConstraints().toQueryMap();
   } else {
     this.constraints = null;
   }
 }
Beispiel #3
0
 /**
  * Partitions {@code collection} to be stored efficiently in {@code View}.
  *
  * <p>This restructures the parallel collection so that all of the entities that will be stored in
  * a given partition will be processed by the same writer.
  *
  * <p>If the dataset is not partitioned, then this will structure all of the entities to produce a
  * number of files equal to {@code numWriters}.
  *
  * @param collection a collection of entities
  * @param view a {@link View} of a dataset to partition the collection for
  * @param numWriters the number of writers that should be used
  * @param <E> the type of entities in the collection and underlying dataset
  * @return an equivalent collection of entities partitioned for the view
  * @see #partition(PCollection, View)
  * @since 0.16.0
  */
 public static <E> PCollection<E> partition(
     PCollection<E> collection, View<E> view, int numWriters) {
   DatasetDescriptor descriptor = view.getDataset().getDescriptor();
   if (descriptor.isPartitioned()) {
     GetStorageKey<E> getKey = new GetStorageKey<E>(view);
     PTable<GenericData.Record, E> table = collection.by(getKey, Avros.generics(getKey.schema()));
     PGroupedTable<GenericData.Record, E> grouped =
         numWriters > 0 ? table.groupByKey(numWriters) : table.groupByKey();
     return grouped.ungroup().values();
   } else {
     return partition(collection, numWriters);
   }
 }
  private static void writeTestRecords(View<TestRecord> view) {
    DatasetWriter<TestRecord> writer = null;
    try {
      writer = view.newWriter();
      for (int i = 0; i < 10; i += 1) {
        TestRecord record = new TestRecord();
        record.id = i;
        record.data = "test/-" + i;
        writer.write(record);
      }

    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
Beispiel #5
0
 /**
  * Partitions {@code collection} to be stored efficiently in {@code View}.
  *
  * <p>This restructures the parallel collection so that all of the entities that will be stored in
  * a given partition will be processed by the same writer.
  *
  * @param collection a collection of entities
  * @param view a {@link View} of a dataset to partition the collection for
  * @param <E> the type of entities in the collection and underlying dataset
  * @return an equivalent collection of entities partitioned for the view
  * @since 0.16.0
  */
 public static <E> PCollection<E> partition(PCollection<E> collection, View<E> view) {
   return partition(collection, view.getDataset(), -1);
 }