@Override
    public void translateNode(Window.Bound<T> transform, FlinkBatchTranslationContext context) {
      PValue input = context.getInput(transform);

      TypeInformation<WindowedValue<T>> resultTypeInfo =
          context.getTypeInfo(context.getOutput(transform));

      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);

      @SuppressWarnings("unchecked")
      final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
          (WindowingStrategy<T, ? extends BoundedWindow>)
              context.getOutput(transform).getWindowingStrategy();

      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();

      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
          new FlinkAssignWindows<>(windowFn);

      DataSet<WindowedValue<T>> resultDataSet =
          inputDataSet
              .flatMap(assignWindowsFunction)
              .name(context.getOutput(transform).getName())
              .returns(resultTypeInfo);

      context.setOutputDataSet(context.getOutput(transform), resultDataSet);
    }
Beispiel #2
0
 private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
     WindowingStrategy<?, W> inputStrategy) {
   if (inputStrategy.getWindowFn() instanceof InvalidWindows) {
     @SuppressWarnings("unchecked")
     InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn();
     return inputStrategy.withWindowFn(invalidWindows.getOriginalWindowFn());
   } else {
     return inputStrategy;
   }
 }
  @Override
  public void processElement(ProcessContext c) throws Exception {
    KeyedWorkItem<K, InputT> element = c.element();

    K key = c.element().key();
    TimerInternals timerInternals = c.windowingInternals().timerInternals();
    StateInternals<K> stateInternals = stateInternalsFactory.stateInternalsForKey(key);

    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner =
        new ReduceFnRunner<>(
            key,
            windowingStrategy,
            ExecutableTriggerStateMachine.create(
                TriggerStateMachines.stateMachineForTrigger(windowingStrategy.getTrigger())),
            stateInternals,
            timerInternals,
            WindowingInternalsAdapters.outputWindowedValue(c.windowingInternals()),
            WindowingInternalsAdapters.sideInputReader(c.windowingInternals()),
            droppedDueToClosedWindow,
            reduceFn,
            c.getPipelineOptions());

    reduceFnRunner.processElements(element.elementsIterable());
    for (TimerData timer : element.timersIterable()) {
      reduceFnRunner.onTimer(timer);
    }
    reduceFnRunner.persist();
  }
Beispiel #4
0
  /**
   * Tests that the given {@link GroupAlsoByWindowsDoFn} implementation combines elements per
   * session window correctly according to the provided {@link CombineFn}.
   */
  public static void combinesElementsPerSessionWithEndOfWindowTimestamp(
      GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory,
      CombineFn<Long, ?, Long> combineFn)
      throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow());

    BoundedWindow secondWindow = window(15, 25);
    List<WindowedValue<KV<String, Long>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            "k",
            WindowedValue.of(1L, new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
            WindowedValue.of(2L, new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
            WindowedValue.of(4L, new Instant(15), Arrays.asList(secondWindow), PaneInfo.NO_FIRING));

    assertThat(result, hasSize(2));

    BoundedWindow firstResultWindow = window(0, 15);
    TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, firstResultWindow);
    assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L))));
    assertThat(item0.getTimestamp(), equalTo(firstResultWindow.maxTimestamp()));

    TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, secondWindow);
    assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(4L))));
    assertThat(item1.getTimestamp(), equalTo(secondWindow.maxTimestamp()));
  }
Beispiel #5
0
  /**
   * Tests that the given GABW implementation correctly groups elements into merged sessions with
   * output timestamps at the end of the merged window.
   */
  public static void groupsElementsInMergedSessionsWithLatestTimestamp(
      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
      throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
            .withOutputTimeFn(OutputTimeFns.outputAtLatestInputTimestamp());

    BoundedWindow unmergedWindow = window(15, 25);
    List<WindowedValue<KV<String, Iterable<String>>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            "k",
            WindowedValue.of(
                "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
            WindowedValue.of(
                "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
            WindowedValue.of(
                "v3", new Instant(15), Arrays.asList(unmergedWindow), PaneInfo.NO_FIRING));

    assertThat(result, hasSize(2));

    BoundedWindow mergedWindow = window(0, 15);
    TimestampedValue<KV<String, Iterable<String>>> item0 =
        getOnlyElementInWindow(result, mergedWindow);
    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
    assertThat(item0.getTimestamp(), equalTo(new Instant(5)));

    TimestampedValue<KV<String, Iterable<String>>> item1 =
        getOnlyElementInWindow(result, unmergedWindow);
    assertThat(item1.getValue().getValue(), contains("v3"));
    assertThat(item1.getTimestamp(), equalTo(new Instant(15)));
  }
Beispiel #6
0
  /**
   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
   * correctly groups them according to fixed windows and also sets the output timestamp according
   * to the policy {@link OutputTimeFns#outputAtEndOfWindow()}.
   */
  public static void groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
      throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow());

    List<WindowedValue<KV<String, Iterable<String>>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            "key",
            WindowedValue.of(
                "v1", new Instant(1), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
            WindowedValue.of(
                "v2", new Instant(2), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
            WindowedValue.of(
                "v3", new Instant(13), Arrays.asList(window(10, 20)), PaneInfo.NO_FIRING));

    assertThat(result, hasSize(2));

    TimestampedValue<KV<String, Iterable<String>>> item0 =
        getOnlyElementInWindow(result, window(0, 10));
    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
    assertThat(item0.getTimestamp(), equalTo(window(0, 10).maxTimestamp()));

    TimestampedValue<KV<String, Iterable<String>>> item1 =
        getOnlyElementInWindow(result, window(10, 20));
    assertThat(item1.getValue().getValue(), contains("v3"));
    assertThat(item1.getTimestamp(), equalTo(window(10, 20).maxTimestamp()));
  }
Beispiel #7
0
    @Override
    public void processElement(WindowedValue<KeyedWorkItem<K, V>> element) throws Exception {
      KeyedWorkItem<K, V> workItem = element.getValue();
      K key = workItem.key();

      UncommittedBundle<KV<K, Iterable<V>>> bundle =
          evaluationContext.createBundle(application.getOutput());
      outputBundles.add(bundle);
      CopyOnAccessInMemoryStateInternals<K> stateInternals =
          (CopyOnAccessInMemoryStateInternals<K>) stepContext.stateInternals();
      DirectTimerInternals timerInternals = stepContext.timerInternals();
      ReduceFnRunner<K, V, Iterable<V>, BoundedWindow> reduceFnRunner =
          new ReduceFnRunner<>(
              key,
              windowingStrategy,
              ExecutableTriggerStateMachine.create(
                  TriggerStateMachines.stateMachineForTrigger(windowingStrategy.getTrigger())),
              stateInternals,
              timerInternals,
              new DirectWindowingInternals<>(bundle),
              droppedDueToClosedWindow,
              reduceFn,
              evaluationContext.getPipelineOptions());

      // Drop any elements within expired windows
      reduceFnRunner.processElements(
          dropExpiredWindows(key, workItem.elementsIterable(), timerInternals));
      for (TimerData timer : workItem.timersIterable()) {
        reduceFnRunner.onTimer(timer);
      }
      reduceFnRunner.persist();
    }
Beispiel #8
0
    @Override
    public void validate(PCollection<T> input) {
      WindowingStrategy<?, ?> outputStrategy =
          getOutputStrategyInternal(input.getWindowingStrategy());

      // Make sure that the windowing strategy is complete & valid.
      if (outputStrategy.isTriggerSpecified()
          && !(outputStrategy.getTrigger() instanceof DefaultTrigger)) {
        if (!(outputStrategy.getWindowFn() instanceof GlobalWindows)
            && !outputStrategy.isAllowedLatenessSpecified()) {
          throw new IllegalArgumentException(
              "Except when using GlobalWindows,"
                  + " calling .triggering() to specify a trigger requires that the allowed lateness be"
                  + " specified using .withAllowedLateness() to set the upper bound on how late data"
                  + " can arrive before being dropped. See Javadoc for more details.");
        }

        if (!outputStrategy.isModeSpecified()) {
          throw new IllegalArgumentException(
              "Calling .triggering() to specify a trigger requires that the accumulation mode be"
                  + " specified using .discardingFiredPanes() or .accumulatingFiredPanes()."
                  + " See Javadoc for more details.");
        }
      }
    }
Beispiel #9
0
  /**
   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
   * correctly groups and combines them according to sliding windows.
   *
   * <p>In the input here, each element occurs in multiple windows.
   */
  public static void combinesElementsInSlidingWindows(
      GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory,
      CombineFn<Long, ?, Long> combineFn)
      throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
            .withOutputTimeFn(OutputTimeFns.outputAtEarliestInputTimestamp());

    List<WindowedValue<KV<String, Long>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            "k",
            WindowedValue.of(
                1L,
                new Instant(5),
                Arrays.asList(window(-10, 10), window(0, 20)),
                PaneInfo.NO_FIRING),
            WindowedValue.of(
                2L,
                new Instant(15),
                Arrays.asList(window(0, 20), window(10, 30)),
                PaneInfo.NO_FIRING),
            WindowedValue.of(
                4L,
                new Instant(18),
                Arrays.asList(window(0, 20), window(10, 30)),
                PaneInfo.NO_FIRING));

    assertThat(result, hasSize(3));

    TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, window(-10, 10));
    assertThat(item0.getValue().getKey(), equalTo("k"));
    assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L))));
    assertThat(item0.getTimestamp(), equalTo(new Instant(5L)));

    TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, window(0, 20));
    assertThat(item1.getValue().getKey(), equalTo("k"));
    assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L, 4L))));
    // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
    assertThat(item1.getTimestamp(), equalTo(new Instant(10L)));

    TimestampedValue<KV<String, Long>> item2 = getOnlyElementInWindow(result, window(10, 30));
    assertThat(item2.getValue().getKey(), equalTo("k"));
    assertThat(item2.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(2L, 4L))));
    // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
    assertThat(item2.getTimestamp(), equalTo(new Instant(20L)));
  }
Beispiel #10
0
  /**
   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
   * correctly groups them into sliding windows.
   *
   * <p>In the input here, each element occurs in multiple windows.
   */
  public static void groupsElementsIntoSlidingWindowsWithMinTimestamp(
      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
      throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
            .withOutputTimeFn(OutputTimeFns.outputAtEarliestInputTimestamp());

    List<WindowedValue<KV<String, Iterable<String>>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            "key",
            WindowedValue.of(
                "v1",
                new Instant(5),
                Arrays.asList(window(-10, 10), window(0, 20)),
                PaneInfo.NO_FIRING),
            WindowedValue.of(
                "v2",
                new Instant(15),
                Arrays.asList(window(0, 20), window(10, 30)),
                PaneInfo.NO_FIRING));

    assertThat(result, hasSize(3));

    TimestampedValue<KV<String, Iterable<String>>> item0 =
        getOnlyElementInWindow(result, window(-10, 10));
    assertThat(item0.getValue().getValue(), contains("v1"));
    assertThat(item0.getTimestamp(), equalTo(new Instant(5)));

    TimestampedValue<KV<String, Iterable<String>>> item1 =
        getOnlyElementInWindow(result, window(0, 20));
    assertThat(item1.getValue().getValue(), containsInAnyOrder("v1", "v2"));
    // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
    assertThat(item1.getTimestamp(), equalTo(new Instant(10)));

    TimestampedValue<KV<String, Iterable<String>>> item2 =
        getOnlyElementInWindow(result, window(10, 30));
    assertThat(item2.getValue().getValue(), contains("v2"));
    // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
    assertThat(item2.getTimestamp(), equalTo(new Instant(20)));
  }
Beispiel #11
0
  /**
   * Tests that for empty input and the given {@link WindowingStrategy}, the provided GABW
   * implementation produces no output.
   *
   * <p>The input type is deliberately left as a wildcard, since it is not relevant.
   */
  public static <K, InputT, OutputT> void emptyInputEmptyOutput(
      GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> gabwFactory) throws Exception {

    WindowingStrategy<?, IntervalWindow> windowingStrategy =
        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));

    // This key should never actually be used, though it is eagerly passed to the
    // StateInternalsFactory so must be non-null
    @SuppressWarnings("unchecked")
    K fakeKey = (K) "this key should never be used";

    List<WindowedValue<KV<K, OutputT>>> result =
        runGABW(
            gabwFactory,
            windowingStrategy,
            fakeKey,
            Collections.<WindowedValue<InputT>>emptyList());

    assertThat(result, hasSize(0));
  }
Beispiel #12
0
 // Rawtype cast of OutputTimeFn cannot be eliminated with intermediate variable, as it is
 // casting between wildcards
 public WindowingStrategy<?, ?> getOutputStrategyInternal(
     WindowingStrategy<?, ?> inputStrategy) {
   WindowingStrategy<?, ?> result = inputStrategy;
   if (windowFn != null) {
     result = result.withWindowFn(windowFn);
   }
   if (trigger != null) {
     result = result.withTrigger(trigger);
   }
   if (mode != null) {
     result = result.withMode(mode);
   }
   if (allowedLateness != null) {
     result = result.withAllowedLateness(allowedLateness);
   }
   if (closingBehavior != null) {
     result = result.withClosingBehavior(closingBehavior);
   }
   if (outputTimeFn != null) {
     result = result.withOutputTimeFn(outputTimeFn);
   }
   return result;
 }
    @Override
    public void translateNode(
        ParDo.BoundMulti<InputT, OutputT> transform, FlinkBatchTranslationContext context) {
      DoFn<InputT, OutputT> doFn = transform.getFn();
      rejectStateAndTimers(doFn);
      DataSet<WindowedValue<InputT>> inputDataSet =
          context.getInputDataSet(context.getInput(transform));

      List<TaggedPValue> outputs = context.getOutputs(transform);

      Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
      // put the main output at index 0, FlinkMultiOutputDoFnFunction  expects this
      outputMap.put(transform.getMainOutputTag(), 0);
      int count = 1;
      for (TaggedPValue taggedValue : outputs) {
        if (!outputMap.containsKey(taggedValue.getTag())) {
          outputMap.put(taggedValue.getTag(), count++);
        }
      }

      // assume that the windowing strategy is the same for all outputs
      WindowingStrategy<?, ?> windowingStrategy = null;

      // collect all output Coders and create a UnionCoder for our tagged outputs
      List<Coder<?>> outputCoders = Lists.newArrayList();
      for (TaggedPValue taggedValue : outputs) {
        checkState(
            taggedValue.getValue() instanceof PCollection,
            "Within ParDo, got a non-PCollection output %s of type %s",
            taggedValue.getValue(),
            taggedValue.getValue().getClass().getSimpleName());
        PCollection<?> coll = (PCollection<?>) taggedValue.getValue();
        outputCoders.add(coll.getCoder());
        windowingStrategy = coll.getWindowingStrategy();
      }

      if (windowingStrategy == null) {
        throw new IllegalStateException("No outputs defined.");
      }

      UnionCoder unionCoder = UnionCoder.of(outputCoders);

      TypeInformation<WindowedValue<RawUnionValue>> typeInformation =
          new CoderTypeInformation<>(
              WindowedValue.getFullCoder(
                  unionCoder, windowingStrategy.getWindowFn().windowCoder()));

      List<PCollectionView<?>> sideInputs = transform.getSideInputs();

      // construct a map from side input to WindowingStrategy so that
      // the OldDoFn runner can map main-input windows to side input windows
      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
      for (PCollectionView<?> sideInput : sideInputs) {
        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
      }

      @SuppressWarnings("unchecked")
      FlinkMultiOutputDoFnFunction<InputT, OutputT> doFnWrapper =
          new FlinkMultiOutputDoFnFunction(
              doFn,
              windowingStrategy,
              sideInputStrategies,
              context.getPipelineOptions(),
              outputMap);

      MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet =
          new MapPartitionOperator<>(
              inputDataSet, typeInformation, doFnWrapper, transform.getName());

      transformSideInputs(sideInputs, taggedDataSet, context);

      for (TaggedPValue output : outputs) {
        pruneOutput(
            taggedDataSet,
            context,
            outputMap.get(output.getTag()),
            (PCollection) output.getValue());
      }
    }
    @Override
    @SuppressWarnings("unchecked")
    public void translateNode(
        Combine.PerKey<K, InputT, OutputT> transform, FlinkBatchTranslationContext context) {
      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
          context.getInputDataSet(context.getInput(transform));

      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn =
          (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn();

      KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();

      Coder<AccumT> accumulatorCoder;

      try {
        accumulatorCoder =
            combineFn.getAccumulatorCoder(
                context.getInput(transform).getPipeline().getCoderRegistry(),
                inputCoder.getKeyCoder(),
                inputCoder.getValueCoder());
      } catch (CannotProvideCoderException e) {
        throw new RuntimeException(e);
      }

      WindowingStrategy<?, ?> windowingStrategy =
          context.getInput(transform).getWindowingStrategy();

      TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo =
          context.getTypeInfo(
              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy);

      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));

      // construct a map from side input to WindowingStrategy so that
      // the OldDoFn runner can map main-input windows to side input windows
      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
      for (PCollectionView<?> sideInput : transform.getSideInputs()) {
        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
      }

      if (windowingStrategy.getWindowFn().isNonMerging()) {
        WindowingStrategy<?, BoundedWindow> boundedStrategy =
            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;

        FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction =
            new FlinkPartialReduceFunction<>(
                combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions());

        FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction =
            new FlinkReduceFunction<>(
                combineFn, boundedStrategy, sideInputStrategies, context.getPipelineOptions());

        // Partially GroupReduce the values into the intermediate format AccumT (combine)
        GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>>
            groupCombine =
                new GroupCombineOperator<>(
                    inputGrouping,
                    partialReduceTypeInfo,
                    partialReduceFunction,
                    "GroupCombine: " + transform.getName());

        transformSideInputs(transform.getSideInputs(), groupCombine, context);

        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
            context.getTypeInfo(context.getOutput(transform));

        Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping =
            groupCombine.groupBy(new KvKeySelector<AccumT, K>(inputCoder.getKeyCoder()));

        // Fully reduce the values and create output format OutputT
        GroupReduceOperator<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>>
            outputDataSet =
                new GroupReduceOperator<>(
                    intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());

        transformSideInputs(transform.getSideInputs(), outputDataSet, context);

        context.setOutputDataSet(context.getOutput(transform), outputDataSet);

      } else {
        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
          throw new UnsupportedOperationException(
              "Merging WindowFn with windows other than IntervalWindow are not supported.");
        }

        // for merging windows we can't to a pre-shuffle combine step since
        // elements would not be in their correct windows for side-input access

        WindowingStrategy<?, IntervalWindow> intervalStrategy =
            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;

        FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction =
            new FlinkMergingNonShuffleReduceFunction<>(
                combineFn, intervalStrategy, sideInputStrategies, context.getPipelineOptions());

        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
            context.getTypeInfo(context.getOutput(transform));

        Grouping<WindowedValue<KV<K, InputT>>> grouping =
            inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));

        // Fully reduce the values and create output format OutputT
        GroupReduceOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>>
            outputDataSet =
                new GroupReduceOperator<>(
                    grouping, reduceTypeInfo, reduceFunction, transform.getName());

        transformSideInputs(transform.getSideInputs(), outputDataSet, context);

        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
      }
    }
    @Override
    public void translateNode(
        GroupByKey<K, InputT> transform, FlinkBatchTranslationContext context) {

      // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API
      // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn

      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
          context.getInputDataSet(context.getInput(transform));

      Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn =
          new Concatenate<InputT>().asKeyedFn();

      KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();

      Coder<List<InputT>> accumulatorCoder;

      try {
        accumulatorCoder =
            combineFn.getAccumulatorCoder(
                context.getInput(transform).getPipeline().getCoderRegistry(),
                inputCoder.getKeyCoder(),
                inputCoder.getValueCoder());
      } catch (CannotProvideCoderException e) {
        throw new RuntimeException(e);
      }

      WindowingStrategy<?, ?> windowingStrategy =
          context.getInput(transform).getWindowingStrategy();

      TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
          new CoderTypeInformation<>(
              WindowedValue.getFullCoder(
                  KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
                  windowingStrategy.getWindowFn().windowCoder()));

      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));

      FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction;
      FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction;

      if (windowingStrategy.getWindowFn().isNonMerging()) {
        @SuppressWarnings("unchecked")
        WindowingStrategy<?, BoundedWindow> boundedStrategy =
            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;

        partialReduceFunction =
            new FlinkPartialReduceFunction<>(
                combineFn,
                boundedStrategy,
                Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
                context.getPipelineOptions());

        reduceFunction =
            new FlinkReduceFunction<>(
                combineFn,
                boundedStrategy,
                Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
                context.getPipelineOptions());

      } else {
        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
          throw new UnsupportedOperationException(
              "Merging WindowFn with windows other than IntervalWindow are not supported.");
        }

        @SuppressWarnings("unchecked")
        WindowingStrategy<?, IntervalWindow> intervalStrategy =
            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;

        partialReduceFunction =
            new FlinkMergingPartialReduceFunction<>(
                combineFn,
                intervalStrategy,
                Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
                context.getPipelineOptions());

        reduceFunction =
            new FlinkMergingReduceFunction<>(
                combineFn,
                intervalStrategy,
                Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
                context.getPipelineOptions());
      }

      // Partially GroupReduce the values into the intermediate format AccumT (combine)
      GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>>
          groupCombine =
              new GroupCombineOperator<>(
                  inputGrouping,
                  partialReduceTypeInfo,
                  partialReduceFunction,
                  "GroupCombine: " + transform.getName());

      Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
          groupCombine.groupBy(new KvKeySelector<List<InputT>, K>(inputCoder.getKeyCoder()));

      // Fully reduce the values and create output format VO
      GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>>
          outputDataSet =
              new GroupReduceOperator<>(
                  intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName());

      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
    }