@Override public PCollection<KV<URI, String>> apply(PInput input) { Pipeline pipeline = input.getPipeline(); // Create one TextIO.Read transform for each document // and add its output to a PCollectionList PCollectionList<KV<URI, String>> urisToLines = PCollectionList.empty(pipeline); // TextIO.Read supports: // - file: URIs and paths locally // - gs: URIs on the service for (final URI uri : uris) { String uriString; if (uri.getScheme().equals("file")) { uriString = new File(uri).getPath(); } else { uriString = uri.toString(); } PCollection<KV<URI, String>> oneUriToLines = pipeline .apply(TextIO.Read.from(uriString).named("TextIO.Read(" + uriString + ")")) .apply(WithKeys.<URI, String>of(uri).setName("WithKeys(" + uriString + ")")); urisToLines = urisToLines.and(oneUriToLines); } return urisToLines.apply(Flatten.<KV<URI, String>>pCollections()); }
@Override public PCollection<T> apply(PInput input) { if (filepattern == null) { throw new IllegalStateException( "need to set the filepattern of an AvroIO.Read transform"); } if (schema == null) { throw new IllegalStateException("need to set the schema of an AvroIO.Read transform"); } if (validate) { try { checkState( !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(), "Unable to find any files matching %s", filepattern); } catch (IOException e) { throw new IllegalStateException(String.format("Failed to validate %s", filepattern), e); } } @SuppressWarnings("unchecked") Bounded<T> read = type == GenericRecord.class ? (Bounded<T>) com.google.cloud.dataflow.sdk.io.Read.from( AvroSource.from(filepattern).withSchema(schema)) : com.google.cloud.dataflow.sdk.io.Read.from( AvroSource.from(filepattern).withSchema(type)); PCollection<T> pcol = input.getPipeline().apply("Read", read); // Honor the default output coder that would have been used by this PTransform. pcol.setCoder(getDefaultOutputCoder()); return pcol; }
@Override public PCollection<T> apply(PInput input) { if (topic == null && subscription == null) { throw new IllegalStateException( "need to set either the topic or the subscription for " + "a PubsubIO.Read transform"); } if (topic != null && subscription != null) { throw new IllegalStateException( "Can't set both the topic and the subscription for a " + "PubsubIO.Read transform"); } return PCollection.<T>createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED) .setCoder(coder); }
@Override public PCollection<KV<K, V>> apply(PInput input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED); }