/** Validates the the input GCS path is accessible and that the path is well formed. */
 @Override
 public String validateInputFilePatternSupported(String filepattern) {
   GcsPath gcsPath = getGcsPath(filepattern);
   checkArgument(dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
   String returnValue = verifyPath(filepattern);
   verifyPathIsAccessible(filepattern, "Could not find file %s");
   return returnValue;
 }
 @Override
 public String verifyPath(String path) {
   GcsPath gcsPath = getGcsPath(path);
   checkArgument(gcsPath.isAbsolute(), "Must provide absolute paths for Dataflow");
   checkArgument(
       !gcsPath.getObject().contains("//"),
       "Dataflow Service does not allow objects with consecutive slashes");
   return gcsPath.toResourceName();
 }
 private void verifyPathIsAccessible(String path, String errorMessage) {
   GcsPath gcsPath = getGcsPath(path);
   try {
     checkArgument(dataflowOptions.getGcsUtil().bucketExists(gcsPath), errorMessage, path);
   } catch (IOException e) {
     throw new RuntimeException(
         String.format("Unable to verify that GCS bucket gs://%s exists.", gcsPath.getBucket()),
         e);
   }
 }
  private void testBase(String[] ARGS) throws IOException, GeneralSecurityException {
    // Run the pipeline.
    VariantSimilarity.main(ARGS);

    // Download the pipeline results.
    List<GraphResult> results = Lists.newArrayList();
    for (GcsPath path : helper.gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) {
      BufferedReader reader = helper.openOutput(path.toString());
      for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        results.add(GraphResult.fromString(line));
      }
    }

    // Check the pipeline results.
    assertEquals(helper.PLATINUM_GENOMES_NUMBER_OF_SAMPLES, results.size());

    assertThat(results, CoreMatchers.allOf(CoreMatchers.hasItems(EXPECTED_RESULT)));
  }
 private GcsPath getGcsPath(String path) {
   try {
     return GcsPath.fromUri(path);
   } catch (IllegalArgumentException e) {
     throw new IllegalArgumentException(
         String.format(
             "%s expected a valid 'gs://' path but was given '%s'",
             dataflowOptions.getRunner().getSimpleName(), path),
         e);
   }
 }
Beispiel #6
0
  /** Lists documents contained beneath the {@code options.input} prefix/directory. */
  public static Set<URI> listInputDocuments(Options options)
      throws URISyntaxException, IOException {
    URI baseUri = new URI(options.getInput());

    // List all documents in the directory or GCS prefix.
    URI absoluteUri;
    if (baseUri.getScheme() != null) {
      absoluteUri = baseUri;
    } else {
      absoluteUri =
          new URI(
              "file",
              baseUri.getAuthority(),
              baseUri.getPath(),
              baseUri.getQuery(),
              baseUri.getFragment());
    }

    Set<URI> uris = new HashSet<>();
    if (absoluteUri.getScheme().equals("file")) {
      File directory = new File(absoluteUri);
      for (String entry : directory.list()) {
        File path = new File(directory, entry);
        uris.add(path.toURI());
      }
    } else if (absoluteUri.getScheme().equals("gs")) {
      GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
      URI gcsUriGlob =
          new URI(
              absoluteUri.getScheme(),
              absoluteUri.getAuthority(),
              absoluteUri.getPath() + "*",
              absoluteUri.getQuery(),
              absoluteUri.getFragment());
      for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
        uris.add(entry.toUri());
      }
    }

    return uris;
  }
  private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
    GcsUtil mockGcsUtil = mock(GcsUtil.class);
    when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
    when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();

    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setGcpCredential(new TestCredential());
    options.setJobName("some-job-name");
    options.setProject("some-project");
    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
    options.setFilesToStage(new LinkedList<String>());
    options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
    options.setGcsUtil(mockGcsUtil);
    return options;
  }
  private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);

    // Any request to open gets a new bogus channel
    Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
        .thenReturn(new EmptySeekableByteChannel());

    // Any request for expansion gets a single bogus URL
    // after we first run the expansion code (which will generally
    // return no results, which causes a crash we aren't testing)
    Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
        .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo")));

    return mockGcsUtil;
  }
 @After
 public void tearDown() throws Exception {
   for (GcsPath path : helper.gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) {
     helper.deleteOutput(path.toString());
   }
 }