@Override @SuppressWarnings("unchecked") public void translateNode( Combine.PerKey<K, InputT, OutputT> transform, FlinkBatchTranslationContext context) { DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn = (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<AccumT> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo = context.getTypeInfo( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); // construct a map from side input to WindowingStrategy so that // the OldDoFn runner can map main-input windows to side input windows Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : transform.getSideInputs()) { sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal()); } if (windowingStrategy.getWindowFn().isNonMerging()) { WindowingStrategy<?, BoundedWindow> boundedStrategy = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions()); FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions()); // Partially GroupReduce the values into the intermediate format AccumT (combine) GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); transformSideInputs(transform.getSideInputs(), groupCombine, context); TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<AccumT, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format OutputT GroupReduceOperator<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } else { if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) { throw new UnsupportedOperationException( "Merging WindowFn with windows other than IntervalWindow are not supported."); } // for merging windows we can't to a pre-shuffle combine step since // elements would not be in their correct windows for side-input access WindowingStrategy<?, IntervalWindow> intervalStrategy = (WindowingStrategy<?, IntervalWindow>) windowingStrategy; FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction = new FlinkMergingNonShuffleReduceFunction<>( combineFn, intervalStrategy, sideInputStrategies, context.getPipelineOptions()); TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<WindowedValue<KV<K, InputT>>> grouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format OutputT GroupReduceOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet = new GroupReduceOperator<>( grouping, reduceTypeInfo, reduceFunction, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } }
@Override public void translateNode( GroupByKey<K, InputT> transform, FlinkBatchTranslationContext context) { // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<InputT>().asKeyedFn(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<List<InputT>> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder())); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder())); FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction; FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction; if (windowingStrategy.getWindowFn().isNonMerging()) { @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> boundedStrategy = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); } else { if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) { throw new UnsupportedOperationException( "Merging WindowFn with windows other than IntervalWindow are not supported."); } @SuppressWarnings("unchecked") WindowingStrategy<?, IntervalWindow> intervalStrategy = (WindowingStrategy<?, IntervalWindow>) windowingStrategy; partialReduceFunction = new FlinkMergingPartialReduceFunction<>( combineFn, intervalStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); reduceFunction = new FlinkMergingReduceFunction<>( combineFn, intervalStrategy, Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(), context.getPipelineOptions()); } // Partially GroupReduce the values into the intermediate format AccumT (combine) GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<List<InputT>, K>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format VO GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }