@Override public int run() throws IOException { DatasetRepository repo = getDatasetRepository(); if (targets == null || targets.isEmpty()) { throw new IllegalArgumentException("No views or datasets were specified."); } for (String uriOrName : targets) { if (isViewUri(uriOrName)) { View view = Datasets.load(uriOrName); Preconditions.checkArgument( viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); view.deleteAll(); } else if (isDatasetUri(uriOrName)) { Datasets.delete(uriOrName); } else { repo.delete(namespace, uriOrName); } console.debug("Deleted {}", uriOrName); } return 0; }
private GetStorageKey(View<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // get serializable versions of transient objects this.strategyString = descriptor.getPartitionStrategy().toString(false /* no white space */); this.schemaString = descriptor.getSchema().toString(false /* no white space */); this.type = view.getType(); if (view instanceof AbstractRefinableView) { this.constraints = ((AbstractRefinableView) view).getConstraints().toQueryMap(); } else { this.constraints = null; } }
/** * Partitions {@code collection} to be stored efficiently in {@code View}. * * <p>This restructures the parallel collection so that all of the entities that will be stored in * a given partition will be processed by the same writer. * * <p>If the dataset is not partitioned, then this will structure all of the entities to produce a * number of files equal to {@code numWriters}. * * @param collection a collection of entities * @param view a {@link View} of a dataset to partition the collection for * @param numWriters the number of writers that should be used * @param <E> the type of entities in the collection and underlying dataset * @return an equivalent collection of entities partitioned for the view * @see #partition(PCollection, View) * @since 0.16.0 */ public static <E> PCollection<E> partition( PCollection<E> collection, View<E> view, int numWriters) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); if (descriptor.isPartitioned()) { GetStorageKey<E> getKey = new GetStorageKey<E>(view); PTable<GenericData.Record, E> table = collection.by(getKey, Avros.generics(getKey.schema())); PGroupedTable<GenericData.Record, E> grouped = numWriters > 0 ? table.groupByKey(numWriters) : table.groupByKey(); return grouped.ungroup().values(); } else { return partition(collection, numWriters); } }
private static void writeTestRecords(View<TestRecord> view) { DatasetWriter<TestRecord> writer = null; try { writer = view.newWriter(); for (int i = 0; i < 10; i += 1) { TestRecord record = new TestRecord(); record.id = i; record.data = "test/-" + i; writer.write(record); } } finally { if (writer != null) { writer.close(); } } }
/** * Partitions {@code collection} to be stored efficiently in {@code View}. * * <p>This restructures the parallel collection so that all of the entities that will be stored in * a given partition will be processed by the same writer. * * @param collection a collection of entities * @param view a {@link View} of a dataset to partition the collection for * @param <E> the type of entities in the collection and underlying dataset * @return an equivalent collection of entities partitioned for the view * @since 0.16.0 */ public static <E> PCollection<E> partition(PCollection<E> collection, View<E> view) { return partition(collection, view.getDataset(), -1); }