/** Validates the the input GCS path is accessible and that the path is well formed. */ @Override public String validateInputFilePatternSupported(String filepattern) { GcsPath gcsPath = getGcsPath(filepattern); checkArgument(dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject())); String returnValue = verifyPath(filepattern); verifyPathIsAccessible(filepattern, "Could not find file %s"); return returnValue; }
@Override public String verifyPath(String path) { GcsPath gcsPath = getGcsPath(path); checkArgument(gcsPath.isAbsolute(), "Must provide absolute paths for Dataflow"); checkArgument( !gcsPath.getObject().contains("//"), "Dataflow Service does not allow objects with consecutive slashes"); return gcsPath.toResourceName(); }
private void verifyPathIsAccessible(String path, String errorMessage) { GcsPath gcsPath = getGcsPath(path); try { checkArgument(dataflowOptions.getGcsUtil().bucketExists(gcsPath), errorMessage, path); } catch (IOException e) { throw new RuntimeException( String.format("Unable to verify that GCS bucket gs://%s exists.", gcsPath.getBucket()), e); } }
private void testBase(String[] ARGS) throws IOException, GeneralSecurityException { // Run the pipeline. VariantSimilarity.main(ARGS); // Download the pipeline results. List<GraphResult> results = Lists.newArrayList(); for (GcsPath path : helper.gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) { BufferedReader reader = helper.openOutput(path.toString()); for (String line = reader.readLine(); line != null; line = reader.readLine()) { results.add(GraphResult.fromString(line)); } } // Check the pipeline results. assertEquals(helper.PLATINUM_GENOMES_NUMBER_OF_SAMPLES, results.size()); assertThat(results, CoreMatchers.allOf(CoreMatchers.hasItems(EXPECTED_RESULT))); }
private GcsPath getGcsPath(String path) { try { return GcsPath.fromUri(path); } catch (IllegalArgumentException e) { throw new IllegalArgumentException( String.format( "%s expected a valid 'gs://' path but was given '%s'", dataflowOptions.getRunner().getSimpleName(), path), e); } }
/** Lists documents contained beneath the {@code options.input} prefix/directory. */ public static Set<URI> listInputDocuments(Options options) throws URISyntaxException, IOException { URI baseUri = new URI(options.getInput()); // List all documents in the directory or GCS prefix. URI absoluteUri; if (baseUri.getScheme() != null) { absoluteUri = baseUri; } else { absoluteUri = new URI( "file", baseUri.getAuthority(), baseUri.getPath(), baseUri.getQuery(), baseUri.getFragment()); } Set<URI> uris = new HashSet<>(); if (absoluteUri.getScheme().equals("file")) { File directory = new File(absoluteUri); for (String entry : directory.list()) { File path = new File(directory, entry); uris.add(path.toURI()); } } else if (absoluteUri.getScheme().equals("gs")) { GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil(); URI gcsUriGlob = new URI( absoluteUri.getScheme(), absoluteUri.getAuthority(), absoluteUri.getPath() + "*", absoluteUri.getQuery(), absoluteUri.getFragment()); for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) { uris.add(entry.toUri()); } } return uris; }
private static DataflowPipelineOptions buildPipelineOptions() throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true); when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod(); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setGcpCredential(new TestCredential()); options.setJobName("some-job-name"); options.setProject("some-project"); options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString()); options.setFilesToStage(new LinkedList<String>()); options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest())); options.setGcsUtil(mockGcsUtil); return options; }
private GcsUtil buildMockGcsUtil() throws IOException { GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class); // Any request to open gets a new bogus channel Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))) .thenReturn(new EmptySeekableByteChannel()); // Any request for expansion gets a single bogus URL // after we first run the expansion code (which will generally // return no results, which causes a crash we aren't testing) Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))) .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo"))); return mockGcsUtil; }
@After public void tearDown() throws Exception { for (GcsPath path : helper.gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) { helper.deleteOutput(path.toString()); } }