示例#1
0
  @Test
  public void testViewUris() throws IOException {
    PartitionStrategy partitionStrategy =
        new PartitionStrategy.Builder().hash("username", 2).build();

    Dataset<Record> inputDataset =
        repo.create(
            "ns",
            "in",
            new DatasetDescriptor.Builder()
                .schema(USER_SCHEMA)
                .partitionStrategy(partitionStrategy)
                .build());
    Dataset<Record> outputDataset =
        repo.create(
            "ns",
            "out",
            new DatasetDescriptor.Builder()
                .schema(USER_SCHEMA)
                .partitionStrategy(partitionStrategy)
                .build());

    writeTestUsers(inputDataset, 10);

    URI sourceViewUri =
        new URIBuilder(repo.getUri(), "ns", "in").with("username", "test-0").build();
    View<Record> inputView = Datasets.<Record, Dataset<Record>>load(sourceViewUri, Record.class);
    Assert.assertEquals(1, datasetSize(inputView));

    Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
    PCollection<GenericData.Record> data =
        pipeline.read(CrunchDatasets.asSource(sourceViewUri, GenericData.Record.class));
    URI targetViewUri = new URIBuilder(repo.getUri(), "ns", "out").with("email", "email-0").build();
    pipeline.write(data, CrunchDatasets.asTarget(targetViewUri), Target.WriteMode.APPEND);
    pipeline.run();

    Assert.assertEquals(1, datasetSize(outputDataset));
  }
示例#2
0
  /**
   * List the {@link Dataset} URIs in the repository identified by the URI.
   *
   * <p>URI formats are defined by {@code Dataset} implementations. The repository URIs you pass to
   * this method must begin with {@code repo:}. For example, to list the {@code Dataset} URIs for
   * the Hive repository, provide the URI {@code repo:hive}.
   *
   * @param uri a {@code DatasetRepository} URI
   * @return the URIs present in the {@code DatasetRepository}
   * @throws NullPointerException if {@code uri} is null
   * @throws IllegalArgumentException if {@code uri} is not a repository URI
   */
  public static Collection<URI> list(URI uri) {
    boolean isRepo = URIBuilder.REPO_SCHEME.equals(uri.getScheme());
    Preconditions.checkArgument(isRepo, "Not a repository URI: " + uri);
    DatasetRepository repo = Registration.open(URI.create(uri.getRawSchemeSpecificPart()));

    // build a URI for each dataset name
    URI repoUri = repo.getUri();
    List<URI> datasets = Lists.newArrayList();
    for (String namespace : repo.namespaces()) {
      for (String dataset : repo.datasets(namespace)) {
        datasets.add(new URIBuilder(repoUri, namespace, dataset).build());
      }
    }

    return datasets;
  }