Java Pipeline Examples, com.google.cloud.dataflow.sdk.Pipeline Java Examples

Example #1

0

Show file

File: TopTest.java Project: cdma/DataflowJavaSDK

  @Test
  @SuppressWarnings("unchecked")
  public void testTopEmpty() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));

    PCollection<KV<String, Integer>> inputTable = createEmptyInputTable(p);
    PCollection<KV<String, List<Integer>>> largestPerKey =
        inputTable.apply(Top.<String, Integer>largestPerKey(2));
    PCollection<KV<String, List<Integer>>> smallestPerKey =
        inputTable.apply(Top.<String, Integer>smallestPerKey(2));

    DataflowAssert.thatSingletonIterable(top1).empty();
    DataflowAssert.thatSingletonIterable(top2).empty();
    DataflowAssert.thatSingletonIterable(top3).empty();
    DataflowAssert.that(largestPerKey).empty();
    DataflowAssert.that(smallestPerKey).empty();

    p.run();
  }

Example #2

0

Show file

File: TfIdf.java Project: ssesha/DataflowJavaSDK

    @Override
    public PCollection<KV<URI, String>> apply(PInput input) {
      Pipeline pipeline = input.getPipeline();

      // Create one TextIO.Read transform for each document
      // and add its output to a PCollectionList
      PCollectionList<KV<URI, String>> urisToLines = PCollectionList.empty(pipeline);

      // TextIO.Read supports:
      //  - file: URIs and paths locally
      //  - gs: URIs on the service
      for (final URI uri : uris) {
        String uriString;
        if (uri.getScheme().equals("file")) {
          uriString = new File(uri).getPath();
        } else {
          uriString = uri.toString();
        }

        PCollection<KV<URI, String>> oneUriToLines =
            pipeline
                .apply(TextIO.Read.from(uriString).named("TextIO.Read(" + uriString + ")"))
                .apply(WithKeys.<URI, String>of(uri).setName("WithKeys(" + uriString + ")"));

        urisToLines = urisToLines.and(oneUriToLines);
      }

      return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
    }

Example #3

0

Show file

File: GATKReadCoderUnitTest.java Project: nenewell/hellbender

  @Test(dataProvider = "reads")
  public void testGATKReadCoding(final List<GATKRead> reads) {
    // The simplest way to figure out if a class is coded correctly is to create a PCollection
    // of that type and see if it matches the List version.
    final Pipeline p = GATKTestPipeline.create();
    DataflowUtils.registerGATKCoders(p);

    // Need to explicitly set the coder to GATKReadCoder, otherwise Create fails to infer
    // a coder properly in the case where the List contains a mix of different GATKRead
    // implementations.
    final PCollection<GATKRead> dataflowReads =
        p.apply(Create.of(reads).withCoder(new GATKReadCoder()));
    DataflowAssert.that(dataflowReads).containsInAnyOrder(reads);

    final PCollection<GATKRead> dataflowReadsAfterTransform =
        dataflowReads
            .apply(
                ParDo.of(
                    new DoFn<GATKRead, GATKRead>() {
                      private static final long serialVersionUID = 1l;

                      @Override
                      public void processElement(ProcessContext c) throws Exception {
                        c.output(c.element());
                      }
                    }))
            .setCoder(new GATKReadCoder());
    DataflowAssert.that(dataflowReadsAfterTransform).containsInAnyOrder(reads);

    p.run();
  }

Example #4

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  @Test
  public void testCompressedRead() throws Exception {
    String[] lines = {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
    File tmpFile = tmpFolder.newFile("test");
    String filename = tmpFile.getPath();

    List<String> expected = new ArrayList<>();
    try (PrintStream writer =
        new PrintStream(new GZIPOutputStream(new FileOutputStream(tmpFile)))) {
      for (String line : lines) {
        writer.println(line);
        expected.add(line);
      }
    }

    Pipeline p = TestPipeline.create();

    TextIO.Read.Bound<String> read =
        TextIO.Read.from(filename).withCompressionType(CompressionType.GZIP);
    PCollection<String> output = p.apply(read);

    DataflowAssert.that(output).containsInAnyOrder(expected);
    p.run();

    tmpFile.delete();
  }

Example #5

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testSettingOfSdkPipelineOptions() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setRunner(DataflowPipelineRunner.class);

    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job =
        DataflowPipelineTranslator.fromOptions(options)
            .translate(p, Collections.<DataflowPackage>emptyList())
            .getJob();

    // Note that the contents of this materialized map may be changed by the act of reading an
    // option, which will cause the default to get materialized whereas it would otherwise be
    // left absent. It is permissible to simply alter this test to reflect current behavior.
    assertEquals(
        ImmutableMap.of(
            "options",
            ImmutableMap.builder()
                .put("appName", "DataflowPipelineTranslatorTest")
                .put("project", "some-project")
                .put(
                    "pathValidatorClass",
                    "com.google.cloud.dataflow.sdk.util.DataflowPathValidator")
                .put("runner", "com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner")
                .put("jobName", "some-job-name")
                .put("tempLocation", "gs://somebucket/some/path")
                .put("stagingLocation", "gs://somebucket/some/path/staging")
                .put("stableUniqueNames", "WARNING")
                .put("streaming", false)
                .put("numberOfWorkerHarnessThreads", 0)
                .build()),
        job.getEnvironment().getSdkPipelineOptions());
  }

Example #6

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  /** This tests a few corner cases that should not crash. */
  @Test
  public void testGoodWildcards() throws Exception {
    TestDataflowPipelineOptions options = buildTestPipelineOptions();
    options.setGcsUtil(buildMockGcsUtil());

    Pipeline pipeline = Pipeline.create(options);

    applyRead(pipeline, "gs://bucket/foo");
    applyRead(pipeline, "gs://bucket/foo/");
    applyRead(pipeline, "gs://bucket/foo/*");
    applyRead(pipeline, "gs://bucket/foo/?");
    applyRead(pipeline, "gs://bucket/foo/[0-9]");
    applyRead(pipeline, "gs://bucket/foo/*baz*");
    applyRead(pipeline, "gs://bucket/foo/*baz?");
    applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
    applyRead(pipeline, "gs://bucket/foo/baz/*");
    applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
    applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
    applyRead(pipeline, "gs://bucket/foo*/baz");
    applyRead(pipeline, "gs://bucket/foo?/baz");
    applyRead(pipeline, "gs://bucket/foo[0-9]/baz");

    // Check that running doesn't fail.
    pipeline.run();
  }

Example #7

0

Show file

File: ApproximateUniqueTest.java Project: latindignity/DataflowJavaSDK

  /**
   * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation error falls within
   * the maximum allowed error of {@code 2/sqrt(sampleSize)}.
   */
  private void runApproximateUniquePipeline(int sampleSize) {
    Pipeline p = TestPipeline.create();
    PCollection<String> collection = readPCollection(p);

    final PCollectionView<Long> exact =
        collection
            .apply(RemoveDuplicates.<String>create())
            .apply(Combine.globally(new CountElements<String>()))
            .apply(View.<Long>asSingleton());

    PCollection<Long> approximate =
        collection.apply(ApproximateUnique.<String>globally(sampleSize));

    PCollection<KV<Long, Long>> approximateAndExact =
        approximate.apply(
            ParDo.of(
                    new DoFn<Long, KV<Long, Long>>() {
                      @Override
                      public void processElement(ProcessContext c) {
                        c.output(KV.of(c.element(), c.sideInput(exact)));
                      }
                    })
                .withSideInputs(exact));

    DataflowAssert.that(approximateAndExact).satisfies(new VerifyEstimatePerKeyFn(sampleSize));

    p.run();
  }

Example #8

0

Show file

File: AddContextDataToReadUnitTest.java Project: davidadamsphd/hellbender

  @Test(dataProvider = "bases")
  public void addContextDataTest(
      List<GATKRead> reads,
      List<Variant> variantList,
      List<KV<GATKRead, ReferenceBases>> kvReadRefBases,
      List<KV<GATKRead, ReadContextData>> kvReadContextData,
      List<SimpleInterval> intervals,
      List<KV<GATKRead, Iterable<Variant>>> kvReadiVariant) {
    Pipeline p = GATKTestPipeline.create();
    DataflowUtils.registerGATKCoders(p);

    PCollection<GATKRead> pReads =
        DataflowTestUtils.pCollectionCreateAndVerify(p, reads, new GATKReadCoder());
    PCollection<KV<GATKRead, ReferenceBases>> pReadRef =
        DataflowTestUtils.pCollectionCreateAndVerify(
            p,
            kvReadRefBases,
            KvCoder.of(new GATKReadCoder(), SerializableCoder.of(ReferenceBases.class)));

    PCollection<KV<GATKRead, Iterable<Variant>>> pReadVariants =
        p.apply(
            Create.of(kvReadiVariant)
                .withCoder(KvCoder.of(new GATKReadCoder(), IterableCoder.of(new VariantCoder()))));

    PCollection<KV<GATKRead, ReadContextData>> joinedResults =
        AddContextDataToRead.join(pReads, pReadRef, pReadVariants);
    PCollection<KV<GATKRead, ReadContextData>> pkvReadContextData =
        p.apply(
            Create.of(kvReadContextData)
                .withCoder(KvCoder.of(new GATKReadCoder(), new ReadContextDataCoder())));
    DataflowTestUtils.keyReadContextDataMatcher(joinedResults, pkvReadContextData);
    p.run();
  }

Example #9

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
    File tmpFile = tmpFolder.newFile("file.txt");
    String filename = tmpFile.getPath();

    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
      for (T elem : expected) {
        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
        String line = new String(encodedElem);
        writer.println(line);
      }
    }

    Pipeline p = TestPipeline.create();

    TextIO.Read.Bound<T> read;
    if (coder.equals(StringUtf8Coder.of())) {
      TextIO.Read.Bound<String> readStrings = TextIO.Read.from(filename);
      // T==String
      read = (TextIO.Read.Bound<T>) readStrings;
    } else {
      read = TextIO.Read.from(filename).withCoder(coder);
    }

    PCollection<T> output = p.apply(read);

    DataflowAssert.that(output).containsInAnyOrder(expected);
    p.run();
  }

Example #10

0

Show file

File: TopTest.java Project: cdma/DataflowJavaSDK

  @Test
  @SuppressWarnings("unchecked")
  public void testTop() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));

    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
    PCollection<KV<String, List<Integer>>> largestPerKey =
        inputTable.apply(Top.<String, Integer>largestPerKey(2));
    PCollection<KV<String, List<Integer>>> smallestPerKey =
        inputTable.apply(Top.<String, Integer>smallestPerKey(2));

    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder(Arrays.asList("bb"));
    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder("z", "c");
    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder("a", "bb", "c");
    DataflowAssert.that(largestPerKey)
        .containsInAnyOrder(KV.of("a", Arrays.asList(3, 2)), KV.of("b", Arrays.asList(100, 10)));
    DataflowAssert.that(smallestPerKey)
        .containsInAnyOrder(KV.of("a", Arrays.asList(1, 2)), KV.of("b", Arrays.asList(1, 10)));

    p.run();
  }

Example #11

0

Show file

File: CloudBigtableIOTest.java Project: vphani/cloud-bigtable-client

 @SuppressWarnings("unchecked")
 @Before
 public void setup() {
   MockitoAnnotations.initMocks(this);
   when(underTest.getOptions()).thenReturn(cbtOptions);
   when(underTest.getCoderRegistry()).thenReturn(registry);
   when(cbtOptions.as(any(Class.class))).thenReturn(cbtOptions);
   CloudBigtableIO.initializeForWrite(underTest);
 }

Example #12

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  /** Recursive wildcards are not supported. This tests "**". */
  @Test
  public void testBadWildcardRecursive() throws Exception {
    Pipeline pipeline = TestPipeline.create();

    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));

    // Check that running does fail.
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("wildcard");
    pipeline.run();
  }

Example #13

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testPartiallyBoundFailure() throws IOException {
    Pipeline p = DataflowPipeline.create(buildPipelineOptions());

    PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));

    thrown.expect(IllegalStateException.class);
    input.apply(new PartiallyBoundOutputCreator());

    Assert.fail("Failure expected from use of partially bound output");
  }

Example #14

0

Show file

File: TopTest.java Project: cdma/DataflowJavaSDK

  @Test
  public void testCountConstraint() {
    Pipeline p = TestPipeline.create();
    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    expectedEx.expect(IllegalArgumentException.class);
    expectedEx.expectMessage(Matchers.containsString(">= 0"));

    input.apply(Top.of(-1, new OrderByLength()));
  }

Example #15

0

Show file

File: TfIdf.java Project: ssesha/DataflowJavaSDK

  public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));

    pipeline
        .apply(new ReadDocuments(listInputDocuments(options)))
        .apply(new ComputeTfIdf())
        .apply(new WriteTfIdf(options.getOutput()));

    pipeline.run();
  }

Example #16

0

Show file

File: TopTest.java Project: cdma/DataflowJavaSDK

  // This is a purely compile-time test.  If the code compiles, then it worked.
  @Test
  public void testPerKeySerializabilityRequirement() {
    Pipeline p = TestPipeline.create();
    p.apply(
        "CreateCollection", Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
    inputTable.apply(Top.<String, Integer, IntegerComparator>perKey(1, new IntegerComparator()));

    inputTable.apply(
        "PerKey2", Top.<String, Integer, IntegerComparator2>perKey(1, new IntegerComparator2()));
  }

Example #17

0

Show file

File: ApproximateUniqueTest.java Project: latindignity/DataflowJavaSDK

  @Test
  @Category(RunnableOnService.class)
  public void testApproximateUniqueWithSmallInput() {
    Pipeline p = TestPipeline.create();

    PCollection<Integer> input = p.apply(Create.of(Arrays.asList(1, 2, 3, 3)));

    PCollection<Long> estimate = input.apply(ApproximateUnique.<Integer>globally(1000));

    DataflowAssert.thatSingleton(estimate).isEqualTo(3L);

    p.run();
  }

Example #18

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  /** Recursive wildcards are not supported. This tests "**". */
  @Test
  public void testBadWildcardRecursive() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline pipeline = DataflowPipeline.create(options);
    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);

    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));

    // Check that translation does fail.
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Unsupported wildcard usage");
    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
  }

Example #19

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testScalingAlgorithmMissing() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();

    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job =
        DataflowPipelineTranslator.fromOptions(options)
            .translate(p, Collections.<DataflowPackage>emptyList())
            .getJob();

    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings());
  }

Example #20

0

Show file

File: PubsubFileInjector.java Project: ssesha/DataflowJavaSDK

  /** Sets up and starts streaming pipeline. */
  public static void main(String[] args) {
    PubsubFileInjectorOptions options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(PubsubFileInjectorOptions.class);

    Pipeline pipeline = Pipeline.create(options);

    pipeline
        .apply(TextIO.Read.from(options.getInput()))
        .apply(
            IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
                .withMaxParallelism(20));

    pipeline.run();
  }

Example #21

0

Show file

File: JoinExamplesITCase.java Project: liaobs/flink-dataflow

  @Override
  protected void testProgram() throws Exception {

    Pipeline p = FlinkTestPipeline.create();

    PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
    PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));

    PCollection<String> output = JoinExamples.joinEvents(input1, input2);

    output.apply(TextIO.Write.to(resultPath));

    p.run();
  }

Example #22

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  @Test
  public void testUnsupportedFilePattern() throws IOException {
    File outFolder = tmpFolder.newFolder();
    // Windows doesn't like resolving paths with * in them.
    String filename = outFolder.toPath().resolve("output@5").toString();

    Pipeline p = TestPipeline.create();

    PCollection<String> input =
        p.apply(Create.of(Arrays.asList(LINES_ARRAY)).withCoder(StringUtf8Coder.of()));

    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Output name components are not allowed to contain");
    input.apply(TextIO.Write.to(filename));
  }

Example #23

0

Show file

File: DataflowExampleUtils.java Project: kennknowles/DataflowJavaSDK-examples

 /**
  * Runs the batch injector for the streaming pipeline.
  *
  * <p>The injector pipeline will read from the given text file, and inject data into the Google
  * Cloud Pub/Sub topic.
  */
 public void runInjectorPipeline(String inputFile, String topic) {
   DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
   copiedOptions.setStreaming(false);
   copiedOptions.setNumWorkers(
       options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
   copiedOptions.setJobName(options.getJobName() + "-injector");
   Pipeline injectorPipeline = Pipeline.create(copiedOptions);
   injectorPipeline
       .apply(TextIO.Read.from(inputFile))
       .apply(
           IntraBundleParallelization.of(PubsubFileInjector.publish(topic))
               .withMaxParallelism(20));
   DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
   jobsToCancel.add(injectorJob);
 }

Example #24

0

Show file

File: KafkaWindowedWordCountExample.java Project: Ravi-Macha/flink-dataflow

  public static void main(String[] args) {
    PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
    KafkaStreamingWordCountOptions options =
        PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
    options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
    options.setStreaming(true);
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkPipelineRunner.class);

    System.out.println(
        options.getKafkaTopic()
            + " "
            + options.getZookeeper()
            + " "
            + options.getBroker()
            + " "
            + options.getGroup());
    Pipeline pipeline = Pipeline.create(options);

    Properties p = new Properties();
    p.setProperty("zookeeper.connect", options.getZookeeper());
    p.setProperty("bootstrap.servers", options.getBroker());
    p.setProperty("group.id", options.getGroup());

    // this is the Flink consumer that reads the input to
    // the program from a kafka topic.
    FlinkKafkaConsumer08<String> kafkaConsumer =
        new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p);

    PCollection<String> words =
        pipeline
            .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(
                Window.<String>into(
                        FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
                    .triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt"));

    pipeline.run();
  }

Example #25

0

Show file

File: TopTest.java Project: cdma/DataflowJavaSDK

  @Test
  public void testTopEmptyWithIncompatibleWindows() {
    Pipeline p = TestPipeline.create();
    Bound<String> windowingFn = Window.<String>into(FixedWindows.of(Duration.standardDays(10L)));
    PCollection<String> input =
        p.apply(Create.timestamped(Collections.<String>emptyList(), Collections.<Long>emptyList()))
            .apply(windowingFn);

    expectedEx.expect(IllegalStateException.class);
    expectedEx.expectMessage("Top");
    expectedEx.expectMessage("GlobalWindows");
    expectedEx.expectMessage("withoutDefaults");
    expectedEx.expectMessage("asSingletonView");

    input.apply(Top.of(1, new OrderByLength()));
  }

Example #26

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testMultiGraphPipelineSerialization() throws IOException {
    Pipeline p = DataflowPipeline.create(buildPipelineOptions());

    PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));

    input.apply(new UnrelatedOutputCreator());
    input.apply(new UnboundOutputCreator());

    DataflowPipelineTranslator t =
        DataflowPipelineTranslator.fromOptions(
            PipelineOptionsFactory.as(DataflowPipelineOptions.class));

    // Check that translation doesn't fail.
    t.translate(p, Collections.<DataflowPackage>emptyList());
  }

Example #27

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testZoneConfig() throws IOException {
    final String testZone = "test-zone-1";

    DataflowPipelineOptions options = buildPipelineOptions();
    options.setZone(testZone);

    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job =
        DataflowPipelineTranslator.fromOptions(options)
            .translate(p, Collections.<DataflowPackage>emptyList())
            .getJob();

    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(testZone, job.getEnvironment().getWorkerPools().get(0).getZone());
  }

Example #28

0

Show file

File: DataflowPipelineTranslatorTest.java Project: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testDiskSizeGbConfig() throws IOException {
    final Integer diskSizeGb = 1234;

    DataflowPipelineOptions options = buildPipelineOptions();
    options.setDiskSizeGb(diskSizeGb);

    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job =
        DataflowPipelineTranslator.fromOptions(options)
            .translate(p, Collections.<DataflowPackage>emptyList())
            .getJob();

    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
  }

Example #29

0

Show file

File: ApproximateUniqueTest.java Project: latindignity/DataflowJavaSDK

  private void runApproximateUniqueWithDuplicates(
      int elementCount, int uniqueCount, int sampleSize) {

    assert elementCount >= uniqueCount;
    List<Double> elements = Lists.newArrayList();
    for (int i = 0; i < elementCount; i++) {
      elements.add(1.0 / (i % uniqueCount + 1));
    }
    Collections.shuffle(elements);

    Pipeline p = TestPipeline.create();
    PCollection<Double> input = p.apply(Create.of(elements));
    PCollection<Long> estimate = input.apply(ApproximateUnique.<Double>globally(sampleSize));

    DataflowAssert.thatSingleton(estimate).satisfies(new VerifyEstimateFn(uniqueCount, sampleSize));

    p.run();
  }

Example #30

0

Show file

File: TextIOTest.java Project: latindignity/DataflowJavaSDK

  @Test
  public void testReadNamed() {
    Pipeline p = TestPipeline.create();

    {
      PCollection<String> output1 = p.apply(TextIO.Read.from("/tmp/file.txt"));
      assertEquals("TextIO.Read.out", output1.getName());
    }

    {
      PCollection<String> output2 = p.apply(TextIO.Read.named("MyRead").from("/tmp/file.txt"));
      assertEquals("MyRead.out", output2.getName());
    }

    {
      PCollection<String> output3 = p.apply(TextIO.Read.from("/tmp/file.txt").named("HerRead"));
      assertEquals("HerRead.out", output3.getName());
    }
  }