Пример #1
0
  /**
   * Applies a reduce function to the window. The window function is called for each evaluation of
   * the window for each key individually. The output of the reduce function is interpreted as a
   * regular non-windowed stream.
   *
   * <p>This window will try and pre-aggregate data as much as the window policies permit. For
   * example, tumbling time windows can perfectly pre-aggregate the data, meaning that only one
   * element per key is stored. Sliding time windows will pre-aggregate on the granularity of the
   * slide interval, so a few elements are stored per key (one per slide interval). Custom windows
   * may not be able to pre-aggregate, or may need to store extra values in an aggregation tree.
   *
   * @param function The reduce function.
   * @return The data stream that is the result of applying the reduce function to the window.
   */
  public SingleOutputStreamOperator<T, ?> reduce(ReduceFunction<T> function) {
    if (function instanceof RichFunction) {
      throw new UnsupportedOperationException(
          "ReduceFunction of reduce can not be a RichFunction. "
              + "Please use apply(ReduceFunction, WindowFunction) instead.");
    }

    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    String callLocation = Utils.getCallLocationName();
    String udfName = "Reduce at " + callLocation;

    SingleOutputStreamOperator<T, ?> result =
        createFastTimeOperatorIfValid(function, input.getType(), udfName);
    if (result != null) {
      return result;
    }

    String opName =
        "NonParallelTriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    OneInputStreamOperator<T, T> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  new ReduceIterableAllWindowFunction<W, T>(function),
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new PreAggregatingHeapWindowBuffer.Factory<>(function),
                  new ReduceIterableAllWindowFunction<W, T>(function),
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, input.getType(), operator).setParallelism(1);
  }
Пример #2
0
  @Override
  @SuppressWarnings("unchecked")
  public final void processElement(StreamRecord<IN> element) throws Exception {
    if (setProcessingTime) {
      element.replace(element.getValue(), System.currentTimeMillis());
    }

    Collection<W> elementWindows =
        windowAssigner.assignWindows(element.getValue(), element.getTimestamp());

    K key = keySelector.getKey(element.getValue());

    Map<W, Context> keyWindows = windows.get(key);
    if (keyWindows == null) {
      keyWindows = new HashMap<>();
      windows.put(key, keyWindows);
    }

    for (W window : elementWindows) {
      Context context = keyWindows.get(window);
      if (context == null) {
        WindowBuffer<IN> windowBuffer = windowBufferFactory.create();
        context = new Context(key, window, windowBuffer);
        keyWindows.put(window, context);
      }

      context.windowBuffer.storeElement(element);
      Trigger.TriggerResult triggerResult = context.onElement(element);
      processTriggerResult(triggerResult, key, window);
    }
  }
Пример #3
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer.
   *
   * @param preAggregator The reduce function that is used for pre-aggregation
   * @param function The window function.
   * @param resultType Type information for the result type of the window function
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      ReduceFunction<T> preAggregator,
      AllWindowFunction<T, R, W> function,
      TypeInformation<R> resultType) {
    if (preAggregator instanceof RichFunction) {
      throw new UnsupportedOperationException("Pre-aggregator of apply can not be a RichFunction.");
    }

    // clean the closures
    function = input.getExecutionEnvironment().clean(function);
    preAggregator = input.getExecutionEnvironment().clean(preAggregator);

    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowApply at " + callLocation;

    String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    OneInputStreamOperator<T, R> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  new ReduceApplyAllWindowFunction<>(preAggregator, function),
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new PreAggregatingHeapWindowBuffer.Factory<>(preAggregator),
                  new ReduceApplyAllWindowFunction<>(preAggregator, function),
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, resultType, operator).setParallelism(1);
  }
Пример #4
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Not that this function requires that all data in the windows is buffered until the window is
   * evaluated, as the function provides no means of pre-aggregation.
   *
   * @param function The window function.
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      AllWindowFunction<Iterable<T>, R, W> function, TypeInformation<R> resultType) {
    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowApply at " + callLocation;

    SingleOutputStreamOperator<R, ?> result =
        createFastTimeOperatorIfValid(function, resultType, udfName);
    if (result != null) {
      return result;
    }

    String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    NonKeyedWindowOperator<T, R, W> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  function,
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  function,
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, resultType, operator).setParallelism(1);
  }
Пример #5
0
 /**
  * Deletes the cleanup timer set for the contents of the provided window.
  *
  * @param window the window whose state to discard
  */
 protected void deleteCleanupTimer(W window) {
   long cleanupTime = cleanupTime(window);
   if (windowAssigner.isEventTime()) {
     context.deleteEventTimeTimer(cleanupTime);
   } else {
     context.deleteProcessingTimeTimer(cleanupTime);
   }
 }
Пример #6
0
 /**
  * Registers a timer to cleanup the content of the window.
  *
  * @param window the window whose state to discard
  */
 protected void registerCleanupTimer(W window) {
   long cleanupTime = cleanupTime(window);
   if (windowAssigner.isEventTime()) {
     context.registerEventTimeTimer(cleanupTime);
   } else {
     context.registerProcessingTimeTimer(cleanupTime);
   }
 }
Пример #7
0
  @Override
  public void trigger(long time) throws Exception {
    boolean fire;

    // Remove information about the triggering task
    processingTimeTimerFutures.remove(time);
    processingTimeTimerTimestamps.remove(time, processingTimeTimerTimestamps.count(time));

    do {
      Timer<K, W> timer = processingTimeTimersQueue.peek();
      if (timer != null && timer.timestamp <= time) {
        fire = true;

        processingTimeTimers.remove(timer);
        processingTimeTimersQueue.remove();

        context.key = timer.key;
        context.window = timer.window;
        setKeyContext(timer.key);

        AppendingState<IN, ACC> windowState;
        MergingWindowSet<W> mergingWindows = null;

        if (windowAssigner instanceof MergingWindowAssigner) {
          mergingWindows = getMergingWindowSet();
          W stateWindow = mergingWindows.getStateWindow(context.window);
          windowState = getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor);
        } else {
          windowState =
              getPartitionedState(context.window, windowSerializer, windowStateDescriptor);
        }

        TriggerResult triggerResult = context.onProcessingTime(timer.timestamp);
        fireOrContinue(triggerResult, context.window, windowState);

        if (triggerResult.isPurge()
            || (!windowAssigner.isEventTime() && isCleanupTime(timer.window, timer.timestamp))) {
          cleanup(timer.window, windowState, mergingWindows);
        }

      } else {
        fire = false;
      }
    } while (fire);
  }
Пример #8
0
  @Override
  public void processWatermark(Watermark mark) throws Exception {
    boolean fire;
    do {
      Timer<K, W> timer = watermarkTimersQueue.peek();
      if (timer != null && timer.timestamp <= mark.getTimestamp()) {
        fire = true;

        watermarkTimers.remove(timer);
        watermarkTimersQueue.remove();

        context.key = timer.key;
        context.window = timer.window;
        setKeyContext(timer.key);

        AppendingState<IN, ACC> windowState;
        MergingWindowSet<W> mergingWindows = null;

        if (windowAssigner instanceof MergingWindowAssigner) {
          mergingWindows = getMergingWindowSet();
          W stateWindow = mergingWindows.getStateWindow(context.window);
          windowState = getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor);
        } else {
          windowState =
              getPartitionedState(context.window, windowSerializer, windowStateDescriptor);
        }

        TriggerResult triggerResult = context.onEventTime(timer.timestamp);
        fireOrContinue(triggerResult, context.window, windowState);

        if (triggerResult.isPurge()
            || (windowAssigner.isEventTime() && isCleanupTime(timer.window, timer.timestamp))) {
          cleanup(timer.window, windowState, mergingWindows);
        }

      } else {
        fire = false;
      }
    } while (fire);

    output.emitWatermark(mark);

    this.currentWatermark = mark.getTimestamp();
  }
Пример #9
0
 public AllWindowedStream(DataStream<T> input, WindowAssigner<? super T, W> windowAssigner) {
   this.input = input;
   this.windowAssigner = windowAssigner;
   this.trigger = windowAssigner.getDefaultTrigger(input.getExecutionEnvironment());
 }
Пример #10
0
 /**
  * Decides if a window is currently late or not, based on the current watermark, i.e. the current
  * event time, and the allowed lateness.
  *
  * @param window The collection of windows returned by the {@link WindowAssigner}.
  * @return The windows (among the {@code eligibleWindows}) for which the element should still be
  *     considered when triggering.
  */
 protected boolean isLate(W window) {
   return (windowAssigner.isEventTime() && (cleanupTime(window) <= currentWatermark));
 }
Пример #11
0
  @Override
  @SuppressWarnings("unchecked")
  public void processElement(StreamRecord<IN> element) throws Exception {

    Collection<W> elementWindows =
        windowAssigner.assignWindows(element.getValue(), element.getTimestamp());

    final K key = (K) getStateBackend().getCurrentKey();

    if (windowAssigner instanceof MergingWindowAssigner) {
      MergingWindowSet<W> mergingWindows = getMergingWindowSet();

      for (W window : elementWindows) {
        // If there is a merge, it can only result in a window that contains our new
        // element because we always eagerly merge
        final Tuple1<TriggerResult> mergeTriggerResult = new Tuple1<>(TriggerResult.CONTINUE);

        // adding the new window might result in a merge, in that case the actualWindow
        // is the merged window and we work with that. If we don't merge then
        // actualWindow == window
        W actualWindow =
            mergingWindows.addWindow(
                window,
                new MergingWindowSet.MergeFunction<W>() {
                  @Override
                  public void merge(
                      W mergeResult,
                      Collection<W> mergedWindows,
                      W stateWindowResult,
                      Collection<W> mergedStateWindows)
                      throws Exception {
                    context.key = key;
                    context.window = mergeResult;

                    // store for later use
                    mergeTriggerResult.f0 = context.onMerge(mergedWindows);

                    for (W m : mergedWindows) {
                      context.window = m;
                      context.clear();
                      deleteCleanupTimer(m);
                    }

                    // merge the merged state windows into the newly resulting state window
                    getStateBackend()
                        .mergePartitionedStates(
                            stateWindowResult,
                            mergedStateWindows,
                            windowSerializer,
                            (StateDescriptor<? extends MergingState<?, ?>, ?>)
                                windowStateDescriptor);
                  }
                });

        // drop if the window is already late
        if (isLate(actualWindow)) {
          LOG.info(
              "Dropped element " + element + " for window " + actualWindow + " due to lateness.");
          continue;
        }

        W stateWindow = mergingWindows.getStateWindow(actualWindow);
        AppendingState<IN, ACC> windowState =
            getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor);
        windowState.add(element.getValue());

        context.key = key;
        context.window = actualWindow;

        // we might have already fired because of a merge but still call onElement
        // on the (possibly merged) window
        TriggerResult triggerResult = context.onElement(element);
        TriggerResult combinedTriggerResult =
            TriggerResult.merge(triggerResult, mergeTriggerResult.f0);
        fireOrContinue(combinedTriggerResult, actualWindow, windowState);

        if (combinedTriggerResult.isPurge()) {
          cleanup(actualWindow, windowState, mergingWindows);
        } else {
          registerCleanupTimer(actualWindow);
        }
      }
    } else {
      for (W window : elementWindows) {

        // drop if the window is already late
        if (isLate(window)) {
          LOG.info("Dropped element " + element + " for window " + window + " due to lateness.");
          continue;
        }

        AppendingState<IN, ACC> windowState =
            getPartitionedState(window, windowSerializer, windowStateDescriptor);
        windowState.add(element.getValue());

        context.key = key;
        context.window = window;

        TriggerResult triggerResult = context.onElement(element);
        fireOrContinue(triggerResult, window, windowState);

        if (triggerResult.isPurge()) {
          cleanup(window, windowState, null);
        } else {
          registerCleanupTimer(window);
        }
      }
    }
  }