/** * Applies a reduce function to the window. The window function is called for each evaluation of * the window for each key individually. The output of the reduce function is interpreted as a * regular non-windowed stream. * * <p>This window will try and pre-aggregate data as much as the window policies permit. For * example, tumbling time windows can perfectly pre-aggregate the data, meaning that only one * element per key is stored. Sliding time windows will pre-aggregate on the granularity of the * slide interval, so a few elements are stored per key (one per slide interval). Custom windows * may not be able to pre-aggregate, or may need to store extra values in an aggregation tree. * * @param function The reduce function. * @return The data stream that is the result of applying the reduce function to the window. */ public SingleOutputStreamOperator<T, ?> reduce(ReduceFunction<T> function) { if (function instanceof RichFunction) { throw new UnsupportedOperationException( "ReduceFunction of reduce can not be a RichFunction. " + "Please use apply(ReduceFunction, WindowFunction) instead."); } // clean the closure function = input.getExecutionEnvironment().clean(function); String callLocation = Utils.getCallLocationName(); String udfName = "Reduce at " + callLocation; SingleOutputStreamOperator<T, ?> result = createFastTimeOperatorIfValid(function, input.getType(), udfName); if (result != null) { return result; } String opName = "NonParallelTriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")"; OneInputStreamOperator<T, T> operator; boolean setProcessingTime = input.getExecutionEnvironment().getStreamTimeCharacteristic() == TimeCharacteristic.ProcessingTime; if (evictor != null) { operator = new EvictingNonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new HeapWindowBuffer.Factory<T>(), new ReduceIterableAllWindowFunction<W, T>(function), trigger, evictor) .enableSetProcessingTime(setProcessingTime); } else { operator = new NonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new PreAggregatingHeapWindowBuffer.Factory<>(function), new ReduceIterableAllWindowFunction<W, T>(function), trigger) .enableSetProcessingTime(setProcessingTime); } return input.transform(opName, input.getType(), operator).setParallelism(1); }
@Override @SuppressWarnings("unchecked") public final void processElement(StreamRecord<IN> element) throws Exception { if (setProcessingTime) { element.replace(element.getValue(), System.currentTimeMillis()); } Collection<W> elementWindows = windowAssigner.assignWindows(element.getValue(), element.getTimestamp()); K key = keySelector.getKey(element.getValue()); Map<W, Context> keyWindows = windows.get(key); if (keyWindows == null) { keyWindows = new HashMap<>(); windows.put(key, keyWindows); } for (W window : elementWindows) { Context context = keyWindows.get(window); if (context == null) { WindowBuffer<IN> windowBuffer = windowBufferFactory.create(); context = new Context(key, window, windowBuffer); keyWindows.put(window, context); } context.windowBuffer.storeElement(element); Trigger.TriggerResult triggerResult = context.onElement(element); processTriggerResult(triggerResult, key, window); } }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer. * * @param preAggregator The reduce function that is used for pre-aggregation * @param function The window function. * @param resultType Type information for the result type of the window function * @return The data stream that is the result of applying the window function to the window. */ public <R> SingleOutputStreamOperator<R, ?> apply( ReduceFunction<T> preAggregator, AllWindowFunction<T, R, W> function, TypeInformation<R> resultType) { if (preAggregator instanceof RichFunction) { throw new UnsupportedOperationException("Pre-aggregator of apply can not be a RichFunction."); } // clean the closures function = input.getExecutionEnvironment().clean(function); preAggregator = input.getExecutionEnvironment().clean(preAggregator); String callLocation = Utils.getCallLocationName(); String udfName = "WindowApply at " + callLocation; String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")"; OneInputStreamOperator<T, R> operator; boolean setProcessingTime = input.getExecutionEnvironment().getStreamTimeCharacteristic() == TimeCharacteristic.ProcessingTime; if (evictor != null) { operator = new EvictingNonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new HeapWindowBuffer.Factory<T>(), new ReduceApplyAllWindowFunction<>(preAggregator, function), trigger, evictor) .enableSetProcessingTime(setProcessingTime); } else { operator = new NonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new PreAggregatingHeapWindowBuffer.Factory<>(preAggregator), new ReduceApplyAllWindowFunction<>(preAggregator, function), trigger) .enableSetProcessingTime(setProcessingTime); } return input.transform(opName, resultType, operator).setParallelism(1); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Not that this function requires that all data in the windows is buffered until the window is * evaluated, as the function provides no means of pre-aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ public <R> SingleOutputStreamOperator<R, ?> apply( AllWindowFunction<Iterable<T>, R, W> function, TypeInformation<R> resultType) { // clean the closure function = input.getExecutionEnvironment().clean(function); String callLocation = Utils.getCallLocationName(); String udfName = "WindowApply at " + callLocation; SingleOutputStreamOperator<R, ?> result = createFastTimeOperatorIfValid(function, resultType, udfName); if (result != null) { return result; } String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")"; NonKeyedWindowOperator<T, R, W> operator; boolean setProcessingTime = input.getExecutionEnvironment().getStreamTimeCharacteristic() == TimeCharacteristic.ProcessingTime; if (evictor != null) { operator = new EvictingNonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new HeapWindowBuffer.Factory<T>(), function, trigger, evictor) .enableSetProcessingTime(setProcessingTime); } else { operator = new NonKeyedWindowOperator<>( windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), new HeapWindowBuffer.Factory<T>(), function, trigger) .enableSetProcessingTime(setProcessingTime); } return input.transform(opName, resultType, operator).setParallelism(1); }
/** * Deletes the cleanup timer set for the contents of the provided window. * * @param window the window whose state to discard */ protected void deleteCleanupTimer(W window) { long cleanupTime = cleanupTime(window); if (windowAssigner.isEventTime()) { context.deleteEventTimeTimer(cleanupTime); } else { context.deleteProcessingTimeTimer(cleanupTime); } }
/** * Registers a timer to cleanup the content of the window. * * @param window the window whose state to discard */ protected void registerCleanupTimer(W window) { long cleanupTime = cleanupTime(window); if (windowAssigner.isEventTime()) { context.registerEventTimeTimer(cleanupTime); } else { context.registerProcessingTimeTimer(cleanupTime); } }
@Override public void trigger(long time) throws Exception { boolean fire; // Remove information about the triggering task processingTimeTimerFutures.remove(time); processingTimeTimerTimestamps.remove(time, processingTimeTimerTimestamps.count(time)); do { Timer<K, W> timer = processingTimeTimersQueue.peek(); if (timer != null && timer.timestamp <= time) { fire = true; processingTimeTimers.remove(timer); processingTimeTimersQueue.remove(); context.key = timer.key; context.window = timer.window; setKeyContext(timer.key); AppendingState<IN, ACC> windowState; MergingWindowSet<W> mergingWindows = null; if (windowAssigner instanceof MergingWindowAssigner) { mergingWindows = getMergingWindowSet(); W stateWindow = mergingWindows.getStateWindow(context.window); windowState = getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor); } else { windowState = getPartitionedState(context.window, windowSerializer, windowStateDescriptor); } TriggerResult triggerResult = context.onProcessingTime(timer.timestamp); fireOrContinue(triggerResult, context.window, windowState); if (triggerResult.isPurge() || (!windowAssigner.isEventTime() && isCleanupTime(timer.window, timer.timestamp))) { cleanup(timer.window, windowState, mergingWindows); } } else { fire = false; } } while (fire); }
@Override public void processWatermark(Watermark mark) throws Exception { boolean fire; do { Timer<K, W> timer = watermarkTimersQueue.peek(); if (timer != null && timer.timestamp <= mark.getTimestamp()) { fire = true; watermarkTimers.remove(timer); watermarkTimersQueue.remove(); context.key = timer.key; context.window = timer.window; setKeyContext(timer.key); AppendingState<IN, ACC> windowState; MergingWindowSet<W> mergingWindows = null; if (windowAssigner instanceof MergingWindowAssigner) { mergingWindows = getMergingWindowSet(); W stateWindow = mergingWindows.getStateWindow(context.window); windowState = getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor); } else { windowState = getPartitionedState(context.window, windowSerializer, windowStateDescriptor); } TriggerResult triggerResult = context.onEventTime(timer.timestamp); fireOrContinue(triggerResult, context.window, windowState); if (triggerResult.isPurge() || (windowAssigner.isEventTime() && isCleanupTime(timer.window, timer.timestamp))) { cleanup(timer.window, windowState, mergingWindows); } } else { fire = false; } } while (fire); output.emitWatermark(mark); this.currentWatermark = mark.getTimestamp(); }
public AllWindowedStream(DataStream<T> input, WindowAssigner<? super T, W> windowAssigner) { this.input = input; this.windowAssigner = windowAssigner; this.trigger = windowAssigner.getDefaultTrigger(input.getExecutionEnvironment()); }
/** * Decides if a window is currently late or not, based on the current watermark, i.e. the current * event time, and the allowed lateness. * * @param window The collection of windows returned by the {@link WindowAssigner}. * @return The windows (among the {@code eligibleWindows}) for which the element should still be * considered when triggering. */ protected boolean isLate(W window) { return (windowAssigner.isEventTime() && (cleanupTime(window) <= currentWatermark)); }
@Override @SuppressWarnings("unchecked") public void processElement(StreamRecord<IN> element) throws Exception { Collection<W> elementWindows = windowAssigner.assignWindows(element.getValue(), element.getTimestamp()); final K key = (K) getStateBackend().getCurrentKey(); if (windowAssigner instanceof MergingWindowAssigner) { MergingWindowSet<W> mergingWindows = getMergingWindowSet(); for (W window : elementWindows) { // If there is a merge, it can only result in a window that contains our new // element because we always eagerly merge final Tuple1<TriggerResult> mergeTriggerResult = new Tuple1<>(TriggerResult.CONTINUE); // adding the new window might result in a merge, in that case the actualWindow // is the merged window and we work with that. If we don't merge then // actualWindow == window W actualWindow = mergingWindows.addWindow( window, new MergingWindowSet.MergeFunction<W>() { @Override public void merge( W mergeResult, Collection<W> mergedWindows, W stateWindowResult, Collection<W> mergedStateWindows) throws Exception { context.key = key; context.window = mergeResult; // store for later use mergeTriggerResult.f0 = context.onMerge(mergedWindows); for (W m : mergedWindows) { context.window = m; context.clear(); deleteCleanupTimer(m); } // merge the merged state windows into the newly resulting state window getStateBackend() .mergePartitionedStates( stateWindowResult, mergedStateWindows, windowSerializer, (StateDescriptor<? extends MergingState<?, ?>, ?>) windowStateDescriptor); } }); // drop if the window is already late if (isLate(actualWindow)) { LOG.info( "Dropped element " + element + " for window " + actualWindow + " due to lateness."); continue; } W stateWindow = mergingWindows.getStateWindow(actualWindow); AppendingState<IN, ACC> windowState = getPartitionedState(stateWindow, windowSerializer, windowStateDescriptor); windowState.add(element.getValue()); context.key = key; context.window = actualWindow; // we might have already fired because of a merge but still call onElement // on the (possibly merged) window TriggerResult triggerResult = context.onElement(element); TriggerResult combinedTriggerResult = TriggerResult.merge(triggerResult, mergeTriggerResult.f0); fireOrContinue(combinedTriggerResult, actualWindow, windowState); if (combinedTriggerResult.isPurge()) { cleanup(actualWindow, windowState, mergingWindows); } else { registerCleanupTimer(actualWindow); } } } else { for (W window : elementWindows) { // drop if the window is already late if (isLate(window)) { LOG.info("Dropped element " + element + " for window " + window + " due to lateness."); continue; } AppendingState<IN, ACC> windowState = getPartitionedState(window, windowSerializer, windowStateDescriptor); windowState.add(element.getValue()); context.key = key; context.window = window; TriggerResult triggerResult = context.onElement(element); fireOrContinue(triggerResult, window, windowState); if (triggerResult.isPurge()) { cleanup(window, windowState, null); } else { registerCleanupTimer(window); } } } }