@Override public void translateNode(Window.Bound<T> transform, FlinkBatchTranslationContext context) { PValue input = context.getInput(transform); TypeInformation<WindowedValue<T>> resultTypeInfo = context.getTypeInfo(context.getOutput(transform)); DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input); @SuppressWarnings("unchecked") final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy = (WindowingStrategy<T, ? extends BoundedWindow>) context.getOutput(transform).getWindowingStrategy(); WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn(); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataSet<WindowedValue<T>> resultDataSet = inputDataSet .flatMap(assignWindowsFunction) .name(context.getOutput(transform).getName()) .returns(resultTypeInfo); context.setOutputDataSet(context.getOutput(transform), resultDataSet); }
private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing( WindowingStrategy<?, W> inputStrategy) { if (inputStrategy.getWindowFn() instanceof InvalidWindows) { @SuppressWarnings("unchecked") InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn(); return inputStrategy.withWindowFn(invalidWindows.getOriginalWindowFn()); } else { return inputStrategy; } }
@Override public void processElement(ProcessContext c) throws Exception { KeyedWorkItem<K, InputT> element = c.element(); K key = c.element().key(); TimerInternals timerInternals = c.windowingInternals().timerInternals(); StateInternals<K> stateInternals = stateInternalsFactory.stateInternalsForKey(key); ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<>( key, windowingStrategy, ExecutableTriggerStateMachine.create( TriggerStateMachines.stateMachineForTrigger(windowingStrategy.getTrigger())), stateInternals, timerInternals, WindowingInternalsAdapters.outputWindowedValue(c.windowingInternals()), WindowingInternalsAdapters.sideInputReader(c.windowingInternals()), droppedDueToClosedWindow, reduceFn, c.getPipelineOptions()); reduceFnRunner.processElements(element.elementsIterable()); for (TimerData timer : element.timersIterable()) { reduceFnRunner.onTimer(timer); } reduceFnRunner.persist(); }
/** * Tests that the given {@link GroupAlsoByWindowsDoFn} implementation combines elements per * session window correctly according to the provided {@link CombineFn}. */ public static void combinesElementsPerSessionWithEndOfWindowTimestamp( GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory, CombineFn<Long, ?, Long> combineFn) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()); BoundedWindow secondWindow = window(15, 25); List<WindowedValue<KV<String, Long>>> result = runGABW( gabwFactory, windowingStrategy, "k", WindowedValue.of(1L, new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING), WindowedValue.of(2L, new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING), WindowedValue.of(4L, new Instant(15), Arrays.asList(secondWindow), PaneInfo.NO_FIRING)); assertThat(result, hasSize(2)); BoundedWindow firstResultWindow = window(0, 15); TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, firstResultWindow); assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L)))); assertThat(item0.getTimestamp(), equalTo(firstResultWindow.maxTimestamp())); TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, secondWindow); assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(4L)))); assertThat(item1.getTimestamp(), equalTo(secondWindow.maxTimestamp())); }
/** * Tests that the given GABW implementation correctly groups elements into merged sessions with * output timestamps at the end of the merged window. */ public static void groupsElementsInMergedSessionsWithLatestTimestamp( GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))) .withOutputTimeFn(OutputTimeFns.outputAtLatestInputTimestamp()); BoundedWindow unmergedWindow = window(15, 25); List<WindowedValue<KV<String, Iterable<String>>>> result = runGABW( gabwFactory, windowingStrategy, "k", WindowedValue.of( "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING), WindowedValue.of( "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING), WindowedValue.of( "v3", new Instant(15), Arrays.asList(unmergedWindow), PaneInfo.NO_FIRING)); assertThat(result, hasSize(2)); BoundedWindow mergedWindow = window(0, 15); TimestampedValue<KV<String, Iterable<String>>> item0 = getOnlyElementInWindow(result, mergedWindow); assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2")); assertThat(item0.getTimestamp(), equalTo(new Instant(5))); TimestampedValue<KV<String, Iterable<String>>> item1 = getOnlyElementInWindow(result, unmergedWindow); assertThat(item1.getValue().getValue(), contains("v3")); assertThat(item1.getTimestamp(), equalTo(new Instant(15))); }
/** * Tests that for a simple sequence of elements on the same key, the given GABW implementation * correctly groups them according to fixed windows and also sets the output timestamp according * to the policy {@link OutputTimeFns#outputAtEndOfWindow()}. */ public static void groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp( GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()); List<WindowedValue<KV<String, Iterable<String>>>> result = runGABW( gabwFactory, windowingStrategy, "key", WindowedValue.of( "v1", new Instant(1), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING), WindowedValue.of( "v2", new Instant(2), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING), WindowedValue.of( "v3", new Instant(13), Arrays.asList(window(10, 20)), PaneInfo.NO_FIRING)); assertThat(result, hasSize(2)); TimestampedValue<KV<String, Iterable<String>>> item0 = getOnlyElementInWindow(result, window(0, 10)); assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2")); assertThat(item0.getTimestamp(), equalTo(window(0, 10).maxTimestamp())); TimestampedValue<KV<String, Iterable<String>>> item1 = getOnlyElementInWindow(result, window(10, 20)); assertThat(item1.getValue().getValue(), contains("v3")); assertThat(item1.getTimestamp(), equalTo(window(10, 20).maxTimestamp())); }
@Override public void processElement(WindowedValue<KeyedWorkItem<K, V>> element) throws Exception { KeyedWorkItem<K, V> workItem = element.getValue(); K key = workItem.key(); UncommittedBundle<KV<K, Iterable<V>>> bundle = evaluationContext.createBundle(application.getOutput()); outputBundles.add(bundle); CopyOnAccessInMemoryStateInternals<K> stateInternals = (CopyOnAccessInMemoryStateInternals<K>) stepContext.stateInternals(); DirectTimerInternals timerInternals = stepContext.timerInternals(); ReduceFnRunner<K, V, Iterable<V>, BoundedWindow> reduceFnRunner = new ReduceFnRunner<>( key, windowingStrategy, ExecutableTriggerStateMachine.create( TriggerStateMachines.stateMachineForTrigger(windowingStrategy.getTrigger())), stateInternals, timerInternals, new DirectWindowingInternals<>(bundle), droppedDueToClosedWindow, reduceFn, evaluationContext.getPipelineOptions()); // Drop any elements within expired windows reduceFnRunner.processElements( dropExpiredWindows(key, workItem.elementsIterable(), timerInternals)); for (TimerData timer : workItem.timersIterable()) { reduceFnRunner.onTimer(timer); } reduceFnRunner.persist(); }
@Override public void validate(PCollection<T> input) { WindowingStrategy<?, ?> outputStrategy = getOutputStrategyInternal(input.getWindowingStrategy()); // Make sure that the windowing strategy is complete & valid. if (outputStrategy.isTriggerSpecified() && !(outputStrategy.getTrigger() instanceof DefaultTrigger)) { if (!(outputStrategy.getWindowFn() instanceof GlobalWindows) && !outputStrategy.isAllowedLatenessSpecified()) { throw new IllegalArgumentException( "Except when using GlobalWindows," + " calling .triggering() to specify a trigger requires that the allowed lateness be" + " specified using .withAllowedLateness() to set the upper bound on how late data" + " can arrive before being dropped. See Javadoc for more details."); } if (!outputStrategy.isModeSpecified()) { throw new IllegalArgumentException( "Calling .triggering() to specify a trigger requires that the accumulation mode be" + " specified using .discardingFiredPanes() or .accumulatingFiredPanes()." + " See Javadoc for more details."); } } }
/** * Tests that for a simple sequence of elements on the same key, the given GABW implementation * correctly groups and combines them according to sliding windows. * * <p>In the input here, each element occurs in multiple windows. */ public static void combinesElementsInSlidingWindows( GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory, CombineFn<Long, ?, Long> combineFn) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))) .withOutputTimeFn(OutputTimeFns.outputAtEarliestInputTimestamp()); List<WindowedValue<KV<String, Long>>> result = runGABW( gabwFactory, windowingStrategy, "k", WindowedValue.of( 1L, new Instant(5), Arrays.asList(window(-10, 10), window(0, 20)), PaneInfo.NO_FIRING), WindowedValue.of( 2L, new Instant(15), Arrays.asList(window(0, 20), window(10, 30)), PaneInfo.NO_FIRING), WindowedValue.of( 4L, new Instant(18), Arrays.asList(window(0, 20), window(10, 30)), PaneInfo.NO_FIRING)); assertThat(result, hasSize(3)); TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, window(-10, 10)); assertThat(item0.getValue().getKey(), equalTo("k")); assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L)))); assertThat(item0.getTimestamp(), equalTo(new Instant(5L))); TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, window(0, 20)); assertThat(item1.getValue().getKey(), equalTo("k")); assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L, 4L)))); // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window assertThat(item1.getTimestamp(), equalTo(new Instant(10L))); TimestampedValue<KV<String, Long>> item2 = getOnlyElementInWindow(result, window(10, 30)); assertThat(item2.getValue().getKey(), equalTo("k")); assertThat(item2.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(2L, 4L)))); // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window assertThat(item2.getTimestamp(), equalTo(new Instant(20L))); }
/** * Tests that for a simple sequence of elements on the same key, the given GABW implementation * correctly groups them into sliding windows. * * <p>In the input here, each element occurs in multiple windows. */ public static void groupsElementsIntoSlidingWindowsWithMinTimestamp( GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))) .withOutputTimeFn(OutputTimeFns.outputAtEarliestInputTimestamp()); List<WindowedValue<KV<String, Iterable<String>>>> result = runGABW( gabwFactory, windowingStrategy, "key", WindowedValue.of( "v1", new Instant(5), Arrays.asList(window(-10, 10), window(0, 20)), PaneInfo.NO_FIRING), WindowedValue.of( "v2", new Instant(15), Arrays.asList(window(0, 20), window(10, 30)), PaneInfo.NO_FIRING)); assertThat(result, hasSize(3)); TimestampedValue<KV<String, Iterable<String>>> item0 = getOnlyElementInWindow(result, window(-10, 10)); assertThat(item0.getValue().getValue(), contains("v1")); assertThat(item0.getTimestamp(), equalTo(new Instant(5))); TimestampedValue<KV<String, Iterable<String>>> item1 = getOnlyElementInWindow(result, window(0, 20)); assertThat(item1.getValue().getValue(), containsInAnyOrder("v1", "v2")); // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window assertThat(item1.getTimestamp(), equalTo(new Instant(10))); TimestampedValue<KV<String, Iterable<String>>> item2 = getOnlyElementInWindow(result, window(10, 30)); assertThat(item2.getValue().getValue(), contains("v2")); // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window assertThat(item2.getTimestamp(), equalTo(new Instant(20))); }
/** * Tests that for empty input and the given {@link WindowingStrategy}, the provided GABW * implementation produces no output. * * <p>The input type is deliberately left as a wildcard, since it is not relevant. */ public static <K, InputT, OutputT> void emptyInputEmptyOutput( GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> gabwFactory) throws Exception { WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10))); // This key should never actually be used, though it is eagerly passed to the // StateInternalsFactory so must be non-null @SuppressWarnings("unchecked") K fakeKey = (K) "this key should never be used"; List<WindowedValue<KV<K, OutputT>>> result = runGABW( gabwFactory, windowingStrategy, fakeKey, Collections.<WindowedValue<InputT>>emptyList()); assertThat(result, hasSize(0)); }
// Rawtype cast of OutputTimeFn cannot be eliminated with intermediate variable, as it is // casting between wildcards public WindowingStrategy<?, ?> getOutputStrategyInternal( WindowingStrategy<?, ?> inputStrategy) { WindowingStrategy<?, ?> result = inputStrategy; if (windowFn != null) { result = result.withWindowFn(windowFn); } if (trigger != null) { result = result.withTrigger(trigger); } if (mode != null) { result = result.withMode(mode); } if (allowedLateness != null) { result = result.withAllowedLateness(allowedLateness); } if (closingBehavior != null) { result = result.withClosingBehavior(closingBehavior); } if (outputTimeFn != null) { result = result.withOutputTimeFn(outputTimeFn); } return result; }
@Override public void translateNode( ParDo.BoundMulti<InputT, OutputT> transform, FlinkBatchTranslationContext context) { DoFn<InputT, OutputT> doFn = transform.getFn(); rejectStateAndTimers(doFn); DataSet<WindowedValue<InputT>> inputDataSet = context.getInputDataSet(context.getInput(transform)); List<TaggedPValue> outputs = context.getOutputs(transform); Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap(); // put the main output at index 0, FlinkMultiOutputDoFnFunction expects this outputMap.put(transform.getMainOutputTag(), 0); int count = 1; for (TaggedPValue taggedValue : outputs) { if (!outputMap.containsKey(taggedValue.getTag())) { outputMap.put(taggedValue.getTag(), count++); } } // assume that the windowing strategy is the same for all outputs WindowingStrategy<?, ?> windowingStrategy = null; // collect all output Coders and create a UnionCoder for our tagged outputs List<Coder<?>> outputCoders = Lists.newArrayList(); for (TaggedPValue taggedValue : outputs) { checkState( taggedValue.getValue() instanceof PCollection, "Within ParDo, got a non-PCollection output %s of type %s", taggedValue.getValue(), taggedValue.getValue().getClass().getSimpleName()); PCollection<?> coll = (PCollection<?>) taggedValue.getValue(); outputCoders.add(coll.getCoder()); windowingStrategy = coll.getWindowingStrategy(); } if (windowingStrategy == null) { throw new IllegalStateException("No outputs defined."); } UnionCoder unionCoder = UnionCoder.of(outputCoders); TypeInformation<WindowedValue<RawUnionValue>> typeInformation = new CoderTypeInformation<>( WindowedValue.getFullCoder( unionCoder, windowingStrategy.getWindowFn().windowCoder())); List<PCollectionView<?>> sideInputs = transform.getSideInputs(); // construct a map from side input to WindowingStrategy so that // the OldDoFn runner can map main-input windows to side input windows Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : sideInputs) { sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal()); } @SuppressWarnings("unchecked") FlinkMultiOutputDoFnFunction<InputT, OutputT> doFnWrapper = new FlinkMultiOutputDoFnFunction( doFn, windowingStrategy, sideInputStrategies, context.getPipelineOptions(), outputMap); MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet = new MapPartitionOperator<>( inputDataSet, typeInformation, doFnWrapper, transform.getName()); transformSideInputs(sideInputs, taggedDataSet, context); for (TaggedPValue output : outputs) { pruneOutput( taggedDataSet, context, outputMap.get(output.getTag()), (PCollection) output.getValue()); } }
@Override @SuppressWarnings("unchecked") public void translateNode( Combine.PerKey<K, InputT, OutputT> transform, FlinkBatchTranslationContext context) { DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn = (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<AccumT> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo = context.getTypeInfo( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); // construct a map from side input to WindowingStrategy so that // the OldDoFn runner can map main-input windows to side input windows Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : transform.getSideInputs()) { sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal()); } if (windowingStrategy.getWindowFn().isNonMerging()) { WindowingStrategy<?, BoundedWindow> boundedStrategy = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions()); FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions()); // Partially GroupReduce the values into the intermediate format AccumT (combine) GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); transformSideInputs(transform.getSideInputs(), groupCombine, context); TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<AccumT, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format OutputT GroupReduceOperator<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } else { if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) { throw new UnsupportedOperationException( "Merging WindowFn with windows other than IntervalWindow are not supported."); } // for merging windows we can't to a pre-shuffle combine step since // elements would not be in their correct windows for side-input access WindowingStrategy<?, IntervalWindow> intervalStrategy = (WindowingStrategy<?, IntervalWindow>) windowingStrategy; FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction = new FlinkMergingNonShuffleReduceFunction<>( combineFn, intervalStrategy, sideInputStrategies, context.getPipelineOptions()); TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<WindowedValue<KV<K, InputT>>> grouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format OutputT GroupReduceOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet = new GroupReduceOperator<>( grouping, reduceTypeInfo, reduceFunction, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } }
@Override public void translateNode( GroupByKey<K, InputT> transform, FlinkBatchTranslationContext context) { // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<InputT>().asKeyedFn(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<List<InputT>> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder())); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction; FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction; if (windowingStrategy.getWindowFn().isNonMerging()) { @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> boundedStrategy = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); } else { if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) { throw new UnsupportedOperationException( "Merging WindowFn with windows other than IntervalWindow are not supported."); } @SuppressWarnings("unchecked") WindowingStrategy<?, IntervalWindow> intervalStrategy = (WindowingStrategy<?, IntervalWindow>) windowingStrategy; partialReduceFunction = new FlinkMergingPartialReduceFunction<>( combineFn, intervalStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); reduceFunction = new FlinkMergingReduceFunction<>( combineFn, intervalStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); } // Partially GroupReduce the values into the intermediate format AccumT (combine) GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<List<InputT>, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format VO GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }