@Test
  public void testReadEmptyCollectionSideInput() throws Exception {
    SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource());

    assertThatContains(
        SideInputUtils.readSideInput(
            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
  }
  @Test
  public void testReadSingletonSideInput() throws Exception {
    SideInputInfo sideInputInfo = createSingletonSideInputInfo(createSideInputSource(42));

    assertEquals(
        42,
        SideInputUtils.readSideInput(
            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
  }
  @Test
  public void testCreateNormalParDoFn() throws Exception {
    String stringState = "some state";
    long longState = 42L;

    TestDoFn fn = new TestDoFn(stringState, longState);

    String serializedFn =
        StringUtils.byteArrayToJsonString(
            SerializableUtils.serializeToByteArray(
                new DoFnInfo(fn, WindowingStrategy.globalDefault())));

    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
    addString(cloudUserFn, "serialized_fn", serializedFn);

    String tag = "output";
    MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
    multiOutputInfo.setTag(tag);
    List<MultiOutputInfo> multiOutputInfos = Arrays.asList(multiOutputInfo);

    PipelineOptions options = PipelineOptionsFactory.create();
    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
    CounterSet counters = new CounterSet();
    StateSampler stateSampler = new StateSampler("test", counters.getAddCounterMutator());
    ParDoFn parDoFn =
        factory.create(
            options,
            cloudUserFn,
            "name",
            "transformName",
            null,
            multiOutputInfos,
            1,
            context,
            counters.getAddCounterMutator(),
            stateSampler);

    // Test that the factory created the correct class
    assertThat(parDoFn, instanceOf(NormalParDoFn.class));

    // Test that the DoFnInfo reflects the one passed in
    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
    DoFnInfo doFnInfo = normalParDoFn.getDoFnInfo();
    DoFn actualDoFn = doFnInfo.getDoFn();
    assertThat(actualDoFn, instanceOf(TestDoFn.class));
    assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class));
    assertThat(
        doFnInfo.getWindowingStrategy().getTrigger().getSpec(), instanceOf(DefaultTrigger.class));

    // Test that the deserialized user DoFn is as expected
    TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
    assertEquals(stringState, actualTestDoFn.stringState);
    assertEquals(longState, actualTestDoFn.longState);
    assertEquals(context, normalParDoFn.getExecutionContext());
  }
  @Test
  public void testSplitsWithSmallBlocks() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    // Test reading from an object file with many small random-sized blocks.
    List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
    String filename =
        generateTestFile(
            "tmp.avro",
            expected,
            SyncBehavior.SYNC_RANDOM,
            100 /* max records/block */,
            AvroCoder.of(Bird.class),
            DataFileConstants.NULL_CODEC);
    File file = new File(filename);

    // Small minimum bundle size
    AvroSource<Bird> source =
        AvroSource.from(filename).withSchema(Bird.class).withMinBundleSize(100L);

    // Assert that the source produces the expected records
    assertEquals(expected, SourceTestUtils.readFromSource(source, options));

    List<? extends BoundedSource<Bird>> splits;
    int nonEmptySplits;

    // Split with the minimum bundle size
    splits = source.splitIntoBundles(100L, options);
    assertTrue(splits.size() > 2);
    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
    nonEmptySplits = 0;
    for (BoundedSource<Bird> subSource : splits) {
      if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
        nonEmptySplits += 1;
      }
    }
    assertTrue(nonEmptySplits > 2);

    // Split with larger bundle size
    splits = source.splitIntoBundles(file.length() / 4, options);
    assertTrue(splits.size() > 2);
    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
    nonEmptySplits = 0;
    for (BoundedSource<Bird> subSource : splits) {
      if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
        nonEmptySplits += 1;
      }
    }
    assertTrue(nonEmptySplits > 2);

    // Split with the file length
    splits = source.splitIntoBundles(file.length(), options);
    assertTrue(splits.size() == 1);
    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
  }
 @Test
 public void testCreateUnknownParDoFn() throws Exception {
   CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn");
   try {
     CounterSet counters = new CounterSet();
     StateSampler stateSampler = new StateSampler("test", counters.getAddCounterMutator());
     factory.create(
         PipelineOptionsFactory.create(),
         cloudUserFn,
         "name",
         "transformName",
         null,
         null,
         1,
         BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
         counters.getAddCounterMutator(),
         stateSampler);
     fail("should have thrown an exception");
   } catch (Exception exn) {
     assertThat(exn.toString(), Matchers.containsString("No known ParDoFnFactory"));
   }
 }
  @SuppressWarnings("rawtypes")
  private static ParDoFn createCombineValuesFn(String phase, Combine.KeyedCombineFn combineFn)
      throws Exception {
    // This partially mirrors the work that
    // com.google.cloud.dataflow.sdk.transforms.Combine.translateHelper
    // does, at least for the KeyedCombineFn. The phase is generated
    // by the back-end.
    CloudObject spec = CloudObject.forClassName("CombineValuesFn");
    addString(
        spec, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
    addString(spec, PropertyNames.PHASE, phase);

    return parDoFnFactory.create(
        PipelineOptionsFactory.create(),
        spec,
        "name",
        "transformName",
        null, // no side inputs
        null, // no side outputs
        1, // single main output
        DataflowExecutionContext.withoutSideInputs(),
        (new CounterSet()).getAddCounterMutator(),
        null);
  }