Beispiel #1
0
      @Override
      public PCollection<T> apply(PInput input) {
        if (filepattern == null) {
          throw new IllegalStateException(
              "need to set the filepattern of an AvroIO.Read transform");
        }
        if (schema == null) {
          throw new IllegalStateException("need to set the schema of an AvroIO.Read transform");
        }
        if (validate) {
          try {
            checkState(
                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
                "Unable to find any files matching %s",
                filepattern);
          } catch (IOException e) {
            throw new IllegalStateException(String.format("Failed to validate %s", filepattern), e);
          }
        }

        @SuppressWarnings("unchecked")
        Bounded<T> read =
            type == GenericRecord.class
                ? (Bounded<T>)
                    com.google.cloud.dataflow.sdk.io.Read.from(
                        AvroSource.from(filepattern).withSchema(schema))
                : com.google.cloud.dataflow.sdk.io.Read.from(
                    AvroSource.from(filepattern).withSchema(type));

        PCollection<T> pcol = input.getPipeline().apply("Read", read);
        // Honor the default output coder that would have been used by this PTransform.
        pcol.setCoder(getDefaultOutputCoder());
        return pcol;
      }
Beispiel #2
0
 @Override
 public SinkWriter<WindowedValue<KV<K, V>>> writer() throws IOException {
   return new IsmSinkWriter(IOChannelUtils.create(filename, MimeTypes.BINARY));
 }