Java StringUtf8Coder Exemples, com.google.cloud.dataflow.sdk.coders.StringUtf8Coder Java Exemples

Exemple #1

0

Afficher le fichier

Fichier : TriggerTester.java Projet : icebergbnu/DataflowJavaSDK

  public static <W extends BoundedWindow, AccumT, OutputT>
      TriggerTester<Integer, OutputT, W> combining(
          WindowFn<?, W> windowFn,
          Trigger<W> trigger,
          AccumulationMode mode,
          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
          Coder<OutputT> outputCoder,
          Duration allowedDataLateness)
          throws Exception {

    WindowingStrategy<?, W> strategy =
        WindowingStrategy.of(windowFn)
            .withTrigger(trigger)
            .withMode(mode)
            .withAllowedLateness(allowedDataLateness);

    CoderRegistry registry = new CoderRegistry();
    registry.registerStandardCoders();
    AppliedCombineFn<String, Integer, AccumT, OutputT> fn =
        AppliedCombineFn.<String, Integer, AccumT, OutputT>withInputCoder(
            combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));

    return new TriggerTester<Integer, OutputT, W>(
        strategy,
        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn)
            .create(KEY),
        outputCoder);
  }

Exemple #2

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

  <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
    File tmpFile = tmpFolder.newFile("file.txt");
    String filename = tmpFile.getPath();

    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
      for (T elem : expected) {
        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
        String line = new String(encodedElem);
        writer.println(line);
      }
    }

    Pipeline p = TestPipeline.create();

    TextIO.Read.Bound<T> read;
    if (coder.equals(StringUtf8Coder.of())) {
      TextIO.Read.Bound<String> readStrings = TextIO.Read.from(filename);
      // T==String
      read = (TextIO.Read.Bound<T>) readStrings;
    } else {
      read = TextIO.Read.from(filename).withCoder(coder);
    }

    PCollection<T> output = p.apply(read);

    DataflowAssert.that(output).containsInAnyOrder(expected);
    p.run();
  }

Exemple #3

0

Afficher le fichier

Fichier : TopTest.java Projet : cdma/DataflowJavaSDK

  @Test
  @SuppressWarnings("unchecked")
  public void testTop() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));

    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
    PCollection<KV<String, List<Integer>>> largestPerKey =
        inputTable.apply(Top.<String, Integer>largestPerKey(2));
    PCollection<KV<String, List<Integer>>> smallestPerKey =
        inputTable.apply(Top.<String, Integer>smallestPerKey(2));

    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder(Arrays.asList("bb"));
    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder("z", "c");
    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder("a", "bb", "c");
    DataflowAssert.that(largestPerKey)
        .containsInAnyOrder(KV.of("a", Arrays.asList(3, 2)), KV.of("b", Arrays.asList(100, 10)));
    DataflowAssert.that(smallestPerKey)
        .containsInAnyOrder(KV.of("a", Arrays.asList(1, 2)), KV.of("b", Arrays.asList(1, 10)));

    p.run();
  }

Exemple #4

0

Afficher le fichier

Fichier : TopTest.java Projet : cdma/DataflowJavaSDK

  @Test
  @SuppressWarnings("unchecked")
  public void testTopEmpty() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));

    PCollection<KV<String, Integer>> inputTable = createEmptyInputTable(p);
    PCollection<KV<String, List<Integer>>> largestPerKey =
        inputTable.apply(Top.<String, Integer>largestPerKey(2));
    PCollection<KV<String, List<Integer>>> smallestPerKey =
        inputTable.apply(Top.<String, Integer>smallestPerKey(2));

    DataflowAssert.thatSingletonIterable(top1).empty();
    DataflowAssert.thatSingletonIterable(top2).empty();
    DataflowAssert.thatSingletonIterable(top3).empty();
    DataflowAssert.that(largestPerKey).empty();
    DataflowAssert.that(smallestPerKey).empty();

    p.run();
  }

Exemple #5

0

Afficher le fichier

Fichier : TopTest.java Projet : cdma/DataflowJavaSDK

  @Test
  public void testCountConstraint() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    expectedEx.expect(IllegalArgumentException.class);
    expectedEx.expectMessage(Matchers.containsString(">= 0"));

    input.apply(Top.of(-1, new OrderByLength()));
  }

Exemple #6

0

Afficher le fichier

Fichier : TopTest.java Projet : cdma/DataflowJavaSDK

  // This is a purely compile-time test.  If the code compiles, then it worked.
  @Test
  public void testPerKeySerializabilityRequirement() {
    Pipeline p = TestPipeline.create();
    p.apply(
        "CreateCollection", Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
    inputTable.apply(Top.<String, Integer, IntegerComparator>perKey(1, new IntegerComparator()));

    inputTable.apply(
        "PerKey2", Top.<String, Integer, IntegerComparator2>perKey(1, new IntegerComparator2()));
  }

Exemple #7

0

Afficher le fichier

Fichier : TriggerTester.java Projet : icebergbnu/DataflowJavaSDK

 @Override
 public void outputWindowedValue(
     KV<String, OutputT> output,
     Instant timestamp,
     Collection<? extends BoundedWindow> windows,
     PaneInfo pane) {
   // Copy the output value (using coders) before capturing it.
   KV<String, OutputT> copy =
       SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
           KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
   WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
   logInteraction("Outputting: %s", value);
   outputs.add(value);
 }

Exemple #8

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

  @Test
  public void testUnsupportedFilePattern() throws IOException {
    File outFolder = tmpFolder.newFolder();
    // Windows doesn't like resolving paths with * in them.
    String filename = outFolder.toPath().resolve("output@5").toString();

    Pipeline p = TestPipeline.create();

    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(LINES_ARRAY)).withCoder(StringUtf8Coder.of()));

    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Output name components are not allowed to contain");
    input.apply(TextIO.Write.to(filename));
  }

Exemple #9

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

  <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
    File tmpFile = tmpFolder.newFile("file.txt");
    String filename = tmpFile.getPath();

    Pipeline p = TestPipeline.create();

    PCollection<T> input = p.apply(Create.of(Arrays.asList(elems)).withCoder(coder));

    TextIO.Write.Bound<T> write;
    if (coder.equals(StringUtf8Coder.of())) {
      TextIO.Write.Bound<String> writeStrings = TextIO.Write.to(filename).withoutSharding();
      // T==String
      write = (TextIO.Write.Bound<T>) writeStrings;
    } else {
      write = TextIO.Write.to(filename).withCoder(coder).withoutSharding();
    }

    input.apply(write);

    p.run();

    List<String> actual = new ArrayList<>();
    try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
      for (; ; ) {
        String line = reader.readLine();
        if (line == null) {
          break;
        }
        actual.add(line);
      }
    }

    String[] expected = new String[elems.length];
    for (int i = 0; i < elems.length; i++) {
      T elem = elems[i];
      byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
      String line = new String(encodedElem);
      expected[i] = line;
    }

    assertThat(actual, containsInAnyOrder(expected));
  }

Exemple #10

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

  @Test
  public void testWriteSharded() throws IOException {
    File outFolder = tmpFolder.newFolder();
    String filename = outFolder.toPath().resolve("output").toString();

    Pipeline p = TestPipeline.create();

    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(LINES_ARRAY)).withCoder(StringUtf8Coder.of()));

    input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));

    p.run();

    String[] files = outFolder.list();

    assertThat(
        Arrays.asList(files),
        containsInAnyOrder("output-00000-of-00002.txt", "output-00001-of-00002.txt"));
  }

Exemple #11

0

Afficher le fichier

Fichier : DataflowPipelineTranslatorTest.java Projet : Jason-CloudTP/DataflowJavaSDK

 @Override
 protected Coder<?> getDefaultOutputCoder() {
   return StringUtf8Coder.of();
 }

Exemple #12

0

Afficher le fichier

Fichier : TfIdf.java Projet : ssesha/DataflowJavaSDK

 @Override
 public Coder<?> getDefaultOutputCoder() {
   return KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of());
 }

Exemple #13

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

 @Test
 public void testWriteEmptyStrings() throws Exception {
   runTestWrite(NO_LINES_ARRAY, StringUtf8Coder.of());
 }

Exemple #14

0

Afficher le fichier

Fichier : TextIOTest.java Projet : latindignity/DataflowJavaSDK

 @Test
 public void testReadStrings() throws Exception {
   runTestRead(LINES_ARRAY, StringUtf8Coder.of());
 }

Exemple #15

0

Afficher le fichier

Fichier : TopTest.java Projet : cdma/DataflowJavaSDK

 public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
   return p.apply(
       "CreateEmptyInputTable",
       Create.of(Arrays.asList(EMPTY_TABLE))
           .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 }

Exemple #16

0

Afficher le fichier

Fichier : PubsubIO.java Projet : ssesha/DataflowJavaSDK

/**
 * Read and Write {@link PTransform}s for Pub/Sub streams. These transforms create and consume
 * unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
 *
 * <p>{@code PubsubIO} is only usable with the {@link
 * com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} and requires {@link
 * com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)} to be enabled.
 */
public class PubsubIO {
  public static final Coder<String> DEFAULT_PUBSUB_CODER = StringUtf8Coder.of();

  /**
   * Project IDs must contain 6-63 lowercase letters, digits, or dashes. IDs must start with a
   * letter and may not end with a dash. This regex isn't exact - this allows for patterns that
   * would be rejected by the service, but this is sufficient for basic parsing of table references.
   */
  private static final Pattern PROJECT_ID_REGEXP =
      Pattern.compile("[a-z][-a-z0-9:.]{4,61}[a-z0-9]");

  private static final Pattern SUBSCRIPTION_REGEXP = Pattern.compile("/subscriptions/([^/]+)/(.+)");

  private static final Pattern TOPIC_REGEXP = Pattern.compile("/topics/([^/]+)/(.+)");

  private static final Pattern PUBSUB_NAME_REGEXP = Pattern.compile("[a-z][-._a-z0-9]+[a-z0-9]");

  private static final int PUBSUB_NAME_MAX_LENGTH = 255;

  private static final String SUBSCRIPTION_RANDOM_TEST_PREFIX = "_random/";
  private static final String SUBSCRIPTION_STARTING_SIGNAL = "_starting_signal/";
  private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";

  /** Utility class to validate topic and subscription names. */
  public static class Validator {
    public static void validateTopicName(String topic) {
      if (topic.equals(TOPIC_DEV_NULL_TEST_NAME)) {
        return;
      }
      Matcher match = TOPIC_REGEXP.matcher(topic);
      if (!match.matches()) {
        throw new IllegalArgumentException(
            "Pubsub topic is not in /topics/project_id/topic_name format: " + topic);
      }
      validateProjectName(match.group(1));
      validatePubsubName(match.group(2));
    }

    public static void validateSubscriptionName(String subscription) {
      if (subscription.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
          || subscription.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
        return;
      }
      Matcher match = SUBSCRIPTION_REGEXP.matcher(subscription);
      if (!match.matches()) {
        throw new IllegalArgumentException(
            "Pubsub subscription is not in /subscriptions/project_id/subscription_name format: "
                + subscription);
      }
      validateProjectName(match.group(1));
      validatePubsubName(match.group(2));
    }

    private static void validateProjectName(String project) {
      Matcher match = PROJECT_ID_REGEXP.matcher(project);
      if (!match.matches()) {
        throw new IllegalArgumentException(
            "Illegal project name specified in Pubsub subscription: " + project);
      }
    }

    private static void validatePubsubName(String name) {
      if (name.length() > PUBSUB_NAME_MAX_LENGTH) {
        throw new IllegalArgumentException(
            "Pubsub object name is longer than 255 characters: " + name);
      }

      if (name.startsWith("goog")) {
        throw new IllegalArgumentException("Pubsub object name cannot start with goog: " + name);
      }

      Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
      if (!match.matches()) {
        throw new IllegalArgumentException(
            "Illegal Pubsub object name specified: "
                + name
                + " Please see Javadoc for naming rules.");
      }
    }
  }

  /**
   * A {@link PTransform} that continuously reads from a Pubsub stream and returns a {@code
   * PCollection<String>} containing the items from the stream.
   */
  public static class Read {
    public static Bound<String> named(String name) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
    }

    /**
     * Creates and returns a PubsubIO.Read PTransform for reading from a Pubsub topic with the
     * specified publisher topic. Format for Cloud Pubsub topic names should be of the form {@code
     * /topics/<project>/<topic>}, where {@code <project>} is the name of the publishing project.
     * The {@code <topic>} component must comply with the below requirements.
     *
     * <ul>
     *   <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and
     *       periods ('.').
     *   <li>Must be between 3 and 255 characters.
     *   <li>Must begin with a letter.
     *   <li>Must end with a letter or a number.
     *   <li>Cannot begin with 'goog' prefix.
     * </ul>
     *
     * <p>Dataflow will start reading data published on this topic from the time the pipeline is
     * started. Any data published on the topic before the pipeline is started will not be read by
     * Dataflow.
     */
    public static Bound<String> topic(String topic) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
    }

    /**
     * Creates and returns a PubsubIO.Read PTransform for reading from a specific Pubsub
     * subscription. Mutually exclusive with PubsubIO.Read.topic(). Cloud Pubsub subscription names
     * should be of the form {@code /subscriptions/<project>/<<subscription>}, where {@code
     * <project>} is the name of the project the subscription belongs to. The {@code <subscription>}
     * component must comply with the below requirements.
     *
     * <ul>
     *   <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and
     *       periods ('.').
     *   <li>Must be between 3 and 255 characters.
     *   <li>Must begin with a letter.
     *   <li>Must end with a letter or a number.
     *   <li>Cannot begin with 'goog' prefix.
     * </ul>
     */
    public static Bound<String> subscription(String subscription) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).subscription(subscription);
    }

    /**
     * Creates and returns a PubsubIO.Read PTransform where record timestamps are expected to be
     * provided using the PubSub labeling API. The {@code <timestampLabel>} parameter specifies the
     * label name. The label value sent to PubsSub is a numerical value representing the number of
     * milliseconds since the Unix epoch. For example, if using the joda time classes,
     * org.joda.time.Instant.getMillis() returns the correct value for this label.
     *
     * <p>If {@code <timestampLabel>} is not provided, the system will generate record timestamps
     * the first time it sees each record. All windowing will be done relative to these timestamps.
     *
     * <p>By default windows are emitted based on an estimate of when this source is likely done
     * producing data for a given timestamp (referred to as the Watermark; see {@link
     * com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for more details). Any
     * late data will be handled by the trigger specified with the windowing strategy -- by default
     * it will be output immediately.
     *
     * <p>The {#dropLateData} field allows you to control what to do with late data. This relaxes
     * the semantics of {@code GroupByKey}; see {@link
     * com.google.cloud.dataflow.sdk.transforms.GroupByKey} for additional information on late data
     * and windowing.
     *
     * <p>Note that the system can guarantee that no late data will ever be seen when it assigns
     * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
     */
    public static Bound<String> timestampLabel(String timestampLabel) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
    }

    /**
     * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are expected
     * to be provided using the PubSub labeling API. The {@code <idLabel>} parameter specifies the
     * label name. The label value sent to PubSub can be any string value that uniquely identifies
     * this record.
     *
     * <p>If idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
     * delivered on the PubSub stream. In this case, deduplication of the stream will be stricly
     * best effort.
     */
    public static Bound<String> idLabel(String idLabel) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
    }

    /**
     * Creates and returns a PubsubIO.Read PTransform that uses the given {@code Coder<T>} to decode
     * PubSub record into a value of type {@code T}.
     *
     * <p>By default, uses {@link StringUtf8Coder}, which just returns the text lines as Java
     * strings.
     *
     * @param <T> the type of the decoded elements, and the elements of the resulting PCollection.
     */
    public static <T> Bound<T> withCoder(Coder<T> coder) {
      return new Bound<>(coder);
    }

    /**
     * A {@link PTransform} that reads from a PubSub source and returns a unbounded PCollection
     * containing the items from the stream.
     */
    @SuppressWarnings("serial")
    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
      /** The Pubsub topic to read from. */
      String topic;
      /** The Pubsub subscription to read from. */
      String subscription;
      /** The Pubsub label to read timestamps from. */
      String timestampLabel;
      /** The Pubsub label to read ids from. */
      String idLabel;
      /** The coder used to decode each record. */
      @Nullable final Coder<T> coder;

      Bound(Coder<T> coder) {
        this.coder = coder;
      }

      Bound(
          String name,
          String subscription,
          String topic,
          String timestampLabel,
          Coder<T> coder,
          String idLabel) {
        super(name);
        if (subscription != null) {
          Validator.validateSubscriptionName(subscription);
        }
        if (topic != null) {
          Validator.validateTopicName(topic);
        }
        this.subscription = subscription;
        this.topic = topic;
        this.timestampLabel = timestampLabel;
        this.coder = coder;
        this.idLabel = idLabel;
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but with the given step name.
       * Does not modify the object.
       */
      public Bound<T> named(String name) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the given
       * subscription. Does not modify the object.
       */
      public Bound<T> subscription(String subscription) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the give
       * topic. Does not modify the object.
       */
      public Bound<T> topic(String topic) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but reading timestamps from the
       * given PubSub label. Does not modify the object.
       */
      public Bound<T> timestampLabel(String timestampLabel) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but reading unique ids from the
       * given PubSub label. Does not modify the object.
       */
      public Bound<T> idLabel(String idLabel) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      /**
       * Returns a new PubsubIO.Read PTransform that's like this one but that uses the given {@code
       * Coder<X>} to decode each record into a value of type {@code X}. Does not modify this
       * object.
       *
       * @param <X> the type of the decoded elements, and the elements of the resulting PCollection.
       */
      public <X> Bound<X> withCoder(Coder<X> coder) {
        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
      }

      @Override
      public PCollection<T> apply(PInput input) {
        if (topic == null && subscription == null) {
          throw new IllegalStateException(
              "need to set either the topic or the subscription for "
                  + "a PubsubIO.Read transform");
        }
        if (topic != null && subscription != null) {
          throw new IllegalStateException(
              "Can't set both the topic and the subscription for a " + "PubsubIO.Read transform");
        }
        return PCollection.<T>createPrimitiveOutputInternal(
                input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED)
            .setCoder(coder);
      }

      @Override
      protected Coder<T> getDefaultOutputCoder() {
        return coder;
      }

      @Override
      protected String getKindString() {
        return "PubsubIO.Read";
      }

      public String getTopic() {
        return topic;
      }

      public String getSubscription() {
        return subscription;
      }

      public String getTimestampLabel() {
        return timestampLabel;
      }

      public String getIdLabel() {
        return idLabel;
      }

      static {
        // TODO: Figure out how to make this work under
        // DirectPipelineRunner.
      }
    }
  }

  /////////////////////////////////////////////////////////////////////////////

  /**
   * A {@link PTransform} that continuously writes a {@code PCollection<String>} to a Pubsub stream.
   */
  // TODO: Support non-String encodings.
  public static class Write {
    public static Bound<String> named(String name) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
    }

    /**
     * The topic to publish to. Cloud Pubsub topic names should be {@code
     * /topics/<project>/<topic>}, where {@code <project>} is the name of the publishing project.
     */
    public static Bound<String> topic(String topic) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
    }

    /**
     * If specified, Dataflow will add a Pubsub label to each output record specifying the logical
     * timestamp of the record. {@code <timestampLabel>} determines the label name. The label value
     * is a numerical value representing the number of milliseconds since the Unix epoch. For
     * example, if using the joda time classes, the org.joda.time.Instant(long) constructor can be
     * used to parse this value. If the output from this sink is being read by another Dataflow
     * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
     * these timestamps from the appropriate label.
     */
    public static Bound<String> timestampLabel(String timestampLabel) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
    }

    /**
     * If specified, Dataflow will add a Pubsub label to each output record containing a unique
     * identifier for that record. {@code <idLabel>} determines the label name. The label value is
     * an opaque string value. This is useful if the the output from this sink is being read by
     * another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that the
     * other source reads these ids from the appropriate label.
     */
    public static Bound<String> idLabel(String idLabel) {
      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
    }

    /**
     * Returns a TextIO.Write PTransform that uses the given {@code Coder<T>} to encode each of the
     * elements of the input {@code PCollection<T>} into an output PubSub record.
     *
     * <p>By default, uses {@link StringUtf8Coder}, which writes input Java strings directly as
     * records.
     *
     * @param <T> the type of the elements of the input PCollection
     */
    public static <T> Bound<T> withCoder(Coder<T> coder) {
      return new Bound<>(coder);
    }

    /**
     * A {@link PTransform} that writes a unbounded {@code PCollection<String>} to a PubSub stream.
     */
    @SuppressWarnings("serial")
    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
      /** The Pubsub topic to publish to. */
      String topic;

      String timestampLabel;
      String idLabel;
      final Coder<T> coder;

      Bound(Coder<T> coder) {
        this.coder = coder;
      }

      Bound(String name, String topic, String timestampLabel, String idLabel, Coder<T> coder) {
        super(name);
        if (topic != null) {
          Validator.validateTopicName(topic);
          this.topic = topic;
        }
        this.timestampLabel = timestampLabel;
        this.idLabel = idLabel;
        this.coder = coder;
      }

      /**
       * Returns a new PubsubIO.Write PTransform that's like this one but with the given step name.
       * Does not modify the object.
       */
      public Bound<T> named(String name) {
        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
      }

      /**
       * Returns a new PubsubIO.Write PTransform that's like this one but writing to the given
       * topic. Does not modify the object.
       */
      public Bound<T> topic(String topic) {
        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
      }

      /**
       * Returns a new PubsubIO.Write PTransform that's like this one but publishing timestamps to
       * the given PubSub label. Does not modify the object.
       */
      public Bound<T> timestampLabel(String timestampLabel) {
        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
      }

      /**
       * Returns a new PubsubIO.Write PTransform that's like this one but publishing record ids to
       * the given PubSub label. Does not modify the object.
       */
      public Bound<T> idLabel(String idLabel) {
        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
      }

      /**
       * Returns a new PubsubIO.Write PTransform that's like this one but that uses the given {@code
       * Coder<X>} to encode each of the elements of the input {@code PCollection<X>} into an output
       * record. Does not modify this object.
       *
       * @param <X> the type of the elements of the input PCollection
       */
      public <X> Bound<X> withCoder(Coder<X> coder) {
        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
      }

      @Override
      public PDone apply(PCollection<T> input) {
        if (topic == null) {
          throw new IllegalStateException("need to set the topic of a PubsubIO.Write transform");
        }
        return PDone.in(input.getPipeline());
      }

      @Override
      protected Coder<Void> getDefaultOutputCoder() {
        return VoidCoder.of();
      }

      @Override
      protected String getKindString() {
        return "PubsubIO.Write";
      }

      public String getTopic() {
        return topic;
      }

      public String getTimestampLabel() {
        return timestampLabel;
      }

      public String getIdLabel() {
        return idLabel;
      }

      public Coder<T> getCoder() {
        return coder;
      }

      static {
        // TODO: Figure out how to make this work under
        // DirectPipelineRunner.
      }
    }
  }
}