@Test public void testReadEmptyCollectionSideInput() throws Exception { SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource()); assertThatContains( SideInputUtils.readSideInput( PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext())); }
@Test public void testReadSingletonSideInput() throws Exception { SideInputInfo sideInputInfo = createSingletonSideInputInfo(createSideInputSource(42)); assertEquals( 42, SideInputUtils.readSideInput( PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext())); }
@Test public void testCreateNormalParDoFn() throws Exception { String stringState = "some state"; long longState = 42L; TestDoFn fn = new TestDoFn(stringState, longState); String serializedFn = StringUtils.byteArrayToJsonString( SerializableUtils.serializeToByteArray( new DoFnInfo(fn, WindowingStrategy.globalDefault()))); CloudObject cloudUserFn = CloudObject.forClassName("DoFn"); addString(cloudUserFn, "serialized_fn", serializedFn); String tag = "output"; MultiOutputInfo multiOutputInfo = new MultiOutputInfo(); multiOutputInfo.setTag(tag); List<MultiOutputInfo> multiOutputInfos = Arrays.asList(multiOutputInfo); PipelineOptions options = PipelineOptionsFactory.create(); DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options); CounterSet counters = new CounterSet(); StateSampler stateSampler = new StateSampler("test", counters.getAddCounterMutator()); ParDoFn parDoFn = factory.create( options, cloudUserFn, "name", "transformName", null, multiOutputInfos, 1, context, counters.getAddCounterMutator(), stateSampler); // Test that the factory created the correct class assertThat(parDoFn, instanceOf(NormalParDoFn.class)); // Test that the DoFnInfo reflects the one passed in NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn; DoFnInfo doFnInfo = normalParDoFn.getDoFnInfo(); DoFn actualDoFn = doFnInfo.getDoFn(); assertThat(actualDoFn, instanceOf(TestDoFn.class)); assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class)); assertThat( doFnInfo.getWindowingStrategy().getTrigger().getSpec(), instanceOf(DefaultTrigger.class)); // Test that the deserialized user DoFn is as expected TestDoFn actualTestDoFn = (TestDoFn) actualDoFn; assertEquals(stringState, actualTestDoFn.stringState); assertEquals(longState, actualTestDoFn.longState); assertEquals(context, normalParDoFn.getExecutionContext()); }
@Test public void testSplitsWithSmallBlocks() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); // Test reading from an object file with many small random-sized blocks. List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT); String filename = generateTestFile( "tmp.avro", expected, SyncBehavior.SYNC_RANDOM, 100 /* max records/block */, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC); File file = new File(filename); // Small minimum bundle size AvroSource<Bird> source = AvroSource.from(filename).withSchema(Bird.class).withMinBundleSize(100L); // Assert that the source produces the expected records assertEquals(expected, SourceTestUtils.readFromSource(source, options)); List<? extends BoundedSource<Bird>> splits; int nonEmptySplits; // Split with the minimum bundle size splits = source.splitIntoBundles(100L, options); assertTrue(splits.size() > 2); SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); nonEmptySplits = 0; for (BoundedSource<Bird> subSource : splits) { if (SourceTestUtils.readFromSource(subSource, options).size() > 0) { nonEmptySplits += 1; } } assertTrue(nonEmptySplits > 2); // Split with larger bundle size splits = source.splitIntoBundles(file.length() / 4, options); assertTrue(splits.size() > 2); SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); nonEmptySplits = 0; for (BoundedSource<Bird> subSource : splits) { if (SourceTestUtils.readFromSource(subSource, options).size() > 0) { nonEmptySplits += 1; } } assertTrue(nonEmptySplits > 2); // Split with the file length splits = source.splitIntoBundles(file.length(), options); assertTrue(splits.size() == 1); SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); }
@Test public void testCreateUnknownParDoFn() throws Exception { CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn"); try { CounterSet counters = new CounterSet(); StateSampler stateSampler = new StateSampler("test", counters.getAddCounterMutator()); factory.create( PipelineOptionsFactory.create(), cloudUserFn, "name", "transformName", null, null, 1, BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()), counters.getAddCounterMutator(), stateSampler); fail("should have thrown an exception"); } catch (Exception exn) { assertThat(exn.toString(), Matchers.containsString("No known ParDoFnFactory")); } }
@SuppressWarnings("rawtypes") private static ParDoFn createCombineValuesFn(String phase, Combine.KeyedCombineFn combineFn) throws Exception { // This partially mirrors the work that // com.google.cloud.dataflow.sdk.transforms.Combine.translateHelper // does, at least for the KeyedCombineFn. The phase is generated // by the back-end. CloudObject spec = CloudObject.forClassName("CombineValuesFn"); addString( spec, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn))); addString(spec, PropertyNames.PHASE, phase); return parDoFnFactory.create( PipelineOptionsFactory.create(), spec, "name", "transformName", null, // no side inputs null, // no side outputs 1, // single main output DataflowExecutionContext.withoutSideInputs(), (new CounterSet()).getAddCounterMutator(), null); }