@Test public void testTypeInfo() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Long> src1 = env.generateSequence(0, 0); assertEquals(TypeExtractor.getForClass(Long.class), src1.getType()); DataStream<Tuple2<Integer, String>> map = src1.map( new MapFunction<Long, Tuple2<Integer, String>>() { @Override public Tuple2<Integer, String> map(Long value) throws Exception { return null; } }); assertEquals(TypeExtractor.getForObject(new Tuple2<>(0, "")), map.getType()); DataStream<String> window = map.windowAll(GlobalWindows.create()) .trigger(PurgingTrigger.of(CountTrigger.of(5))) .apply( new AllWindowFunction<Tuple2<Integer, String>, String, GlobalWindow>() { @Override public void apply( GlobalWindow window, Iterable<Tuple2<Integer, String>> values, Collector<String> out) throws Exception {} }); assertEquals(TypeExtractor.getForClass(String.class), window.getType()); DataStream<CustomPOJO> flatten = window .windowAll(GlobalWindows.create()) .trigger(PurgingTrigger.of(CountTrigger.of(5))) .fold( new CustomPOJO(), new FoldFunction<String, CustomPOJO>() { private static final long serialVersionUID = 1L; @Override public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception { return null; } }); assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType()); }
@Test public void testAllReduceDriverImmutableEmpty() { try { TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>(); List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData(); TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get(); context.setDriverStrategy(DriverStrategy.ALL_REDUCE); context.setInput1(input, typeInfo.createSerializer()); context.setCollector(new DiscardingOutputCollector<Tuple2<String, Integer>>()); AllReduceDriver<Tuple2<String, Integer>> driver = new AllReduceDriver<Tuple2<String, Integer>>(); driver.setup(context); driver.prepare(); driver.run(); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Override public <E> void validateCustomPartitioner( Partitioner<E> partitioner, TypeInformation<E> typeInfo) { if (keyFields.size() != 1) { throw new InvalidProgramException( "Custom partitioners can only be used with keys that have one key field."); } if (typeInfo == null) { try { typeInfo = TypeExtractor.getPartitionerTypes(partitioner); } catch (Throwable t) { // best effort check, so we ignore exceptions } } if (typeInfo != null && !(typeInfo instanceof GenericTypeInfo)) { TypeInformation<?> keyType = keyFields.get(0).getType(); if (!keyType.equals(typeInfo)) { throw new InvalidProgramException( "The partitioner is incompatible with the key type. " + "Partitioner type: " + typeInfo + " , key type: " + keyType); } } }
public static void main(final String[] args) throws Exception { if (!parseParameters(args)) { return; } // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // get input data final DataStream<String> text = getTextDataStream(env); final DataStream<Tuple2<String, Integer>> counts = text // split up the lines in pairs (2-tuples) containing: (word,1) // this is done by a bolt that is wrapped accordingly .transform( "BoltTokenizer", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<String, Tuple2<String, Integer>>(new BoltTokenizer())) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0) .sum(1); // emit result if (fileOutput) { counts.writeAsText(outputPath); } else { counts.print(); } // execute program env.execute("Streaming WordCount with bolt tokenizer"); }
/** * Applies a FlatMap transformation on a {@link DataStream}. The transformation calls a {@link * FlatMapFunction} for each element of the DataStream. Each FlatMapFunction call can return any * number of elements including none. The user can also extend {@link RichFlatMapFunction} to gain * access to other features provided by the {@link * org.apache.flink.api.common.functions.RichFunction} interface. * * @param flatMapper The FlatMapFunction that is called for each element of the DataStream * @param <R> output type * @return The transformed {@link DataStream}. */ public <R> SingleOutputStreamOperator<R> flatMap(FlatMapFunction<T, R> flatMapper) { TypeInformation<R> outType = TypeExtractor.getFlatMapReturnTypes( clean(flatMapper), getType(), Utils.getCallLocationName(), true); return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper))); }
/** * Applies a window function to the window. The window function is called for each evaluation of * the window for each key individually. The output of the window function is interpreted as a * regular non-windowed stream. * * <p>Not that this function requires that all data in the windows is buffered until the window is * evaluated, as the function provides no means of pre-aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ public <R> SingleOutputStreamOperator<R, ?> apply(AllWindowFunction<Iterable<T>, R, W> function) { @SuppressWarnings("unchecked, rawtypes") TypeInformation<Iterable<T>> iterTypeInfo = new GenericTypeInfo<>((Class) Iterable.class); TypeInformation<R> resultType = TypeExtractor.getUnaryOperatorReturnType( function, AllWindowFunction.class, true, true, iterTypeInfo, null, false); return apply(function, resultType); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer. * * @param preAggregator The reduce function that is used for pre-aggregation * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ public <R> SingleOutputStreamOperator<R, ?> apply( ReduceFunction<T> preAggregator, AllWindowFunction<T, R, W> function) { TypeInformation<T> inType = input.getType(); TypeInformation<R> resultType = TypeExtractor.getUnaryOperatorReturnType( function, AllWindowFunction.class, true, true, inType, null, false); return apply(preAggregator, function, resultType); }
@Test public void testTypeInfo() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Long> src1 = env.generateSequence(0, 0); assertEquals(TypeExtractor.getForClass(Long.class), src1.getType()); DataStream<Tuple2<Integer, String>> map = src1.map( new MapFunction<Long, Tuple2<Integer, String>>() { @Override public Tuple2<Integer, String> map(Long value) throws Exception { return null; } }); assertEquals(TypeExtractor.getForObject(new Tuple2<Integer, String>(0, "")), map.getType()); WindowedDataStream<String> window = map.window(Count.of(5)) .mapWindow( new WindowMapFunction<Tuple2<Integer, String>, String>() { @Override public void mapWindow( Iterable<Tuple2<Integer, String>> values, Collector<String> out) throws Exception {} }); assertEquals(TypeExtractor.getForClass(String.class), window.getType()); DataStream<CustomPOJO> flatten = window .foldWindow( new CustomPOJO(), new FoldFunction<String, CustomPOJO>() { @Override public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception { return null; } }) .flatten(); assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType()); }
/** * Applies the given fold function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the reduce function is * interpreted as a regular non-windowed stream. * * @param function The fold function. * @return The data stream that is the result of applying the fold function to the window. */ public <R> SingleOutputStreamOperator<R, ?> fold(R initialValue, FoldFunction<T, R> function) { // clean the closure function = input.getExecutionEnvironment().clean(function); TypeInformation<R> resultType = TypeExtractor.getFoldReturnTypes( function, input.getType(), Utils.getCallLocationName(), true); return apply(new FoldAllWindowFunction<W, T, R>(initialValue, function), resultType); }
private TypeSerializer<Object> createSerializer(Object key, int pos) { if (key == null) { throw new NullKeyFieldException(pos); } try { TypeInformation<Object> info = TypeExtractor.getForObject(key); return info.createSerializer(executionConfig); } catch (Throwable t) { throw new RuntimeException("Could not create key serializer for type " + key); } }
/** Specifies a {@link KeySelector} for elements from the second input. */ public EqualTo equalTo(KeySelector<T2, KEY> keySelector) { TypeInformation<KEY> otherKey = TypeExtractor.getKeySelectorTypes(keySelector, input2.getType()); if (!otherKey.equals(this.keyType)) { throw new IllegalArgumentException( "The keys for the two inputs are not equal: " + "first key = " + this.keyType + " , second key = " + otherKey); } return new EqualTo(input2.clean(keySelector)); }
/** * Completes the co-group operation with the user function that is executed for windowed groups. */ public <T> DataStream<T> apply(CoGroupFunction<T1, T2, T> function) { TypeInformation<T> resultType = TypeExtractor.getBinaryOperatorReturnType( function, CoGroupFunction.class, true, true, input1.getType(), input2.getType(), "CoGroup", false); return apply(function, resultType); }
/** * Applies a CoMap transformation on a {@link ConnectedStreams} and maps the output to a common * type. The transformation calls a {@link CoMapFunction#map1} for each element of the first input * and {@link CoMapFunction#map2} for each element of the second input. Each CoMapFunction call * returns exactly one element. * * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams * @return The transformed {@link DataStream} */ public <R> SingleOutputStreamOperator<R> map(CoMapFunction<IN1, IN2, R> coMapper) { TypeInformation<R> outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coMapper, CoMapFunction.class, false, true, getType1(), getType2(), Utils.getCallLocationName(), true); return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper))); }
private static DataStream<Tuple1<String>> getTextDataStream( final StreamExecutionEnvironment env) { if (fileOutput) { // read the text file from given input path TupleTypeInfo<Tuple1<String>> sourceType = (TupleTypeInfo<Tuple1<String>>) TypeExtractor.getForObject(new Tuple1<String>("")); return env.createInput( new CsvInputFormat<Tuple1<String>>( new Path(textPath), CsvInputFormat.DEFAULT_LINE_DELIMITER, CsvInputFormat.DEFAULT_LINE_DELIMITER, sourceType), sourceType); } return env.fromElements(WordCountDataTuple.TUPLES); }
public class PojoSubclassComparatorTest extends ComparatorTestBase<PojoContainingTuple> { TypeInformation<PojoContainingTuple> type = TypeExtractor.getForClass(PojoContainingTuple.class); PojoContainingTuple[] data = new PojoContainingTuple[] { new Subclass(1, 1L, 1L, 17L), new Subclass(2, 2L, 2L, 42L), new Subclass(8519, 85190L, 85190L, 117L), new Subclass(8520, 85191L, 85191L, 93L), }; @Override protected TypeComparator<PojoContainingTuple> createComparator(boolean ascending) { Assert.assertTrue(type instanceof CompositeType); CompositeType<PojoContainingTuple> cType = (CompositeType<PojoContainingTuple>) type; ExpressionKeys<PojoContainingTuple> keys = new ExpressionKeys<PojoContainingTuple>(new String[] {"theTuple.*"}, cType); boolean[] orders = new boolean[keys.getNumberOfKeyFields()]; Arrays.fill(orders, ascending); return cType.createComparator( keys.computeLogicalKeyPositions(), orders, 0, new ExecutionConfig()); } @Override protected TypeSerializer<PojoContainingTuple> createSerializer() { return type.createSerializer(new ExecutionConfig()); } @Override protected PojoContainingTuple[] getSortedTestData() { return data; } public static class Subclass extends PojoContainingTuple { public long additionalField; public Subclass() {} public Subclass(int i, long l1, long l2, long additionalField) { super(i, l1, l2); this.additionalField = additionalField; } } }
@Override public <S extends Serializable> ValueState<S> getKeyValueState( String name, Class<S> stateType, S defaultState) { requireNonNull(stateType, "The state type class must not be null"); TypeInformation<S> typeInfo; try { typeInfo = TypeExtractor.getForClass(stateType); } catch (Exception e) { throw new RuntimeException( "Cannot analyze type '" + stateType.getName() + "' from the class alone, due to generic type parameters. " + "Please specify the TypeInformation directly.", e); } return getKeyValueState(name, typeInfo, defaultState); }
/** * Finalizes a CoGroup transformation by applying a {@link * org.apache.flink.api.common.functions.RichCoGroupFunction} to groups of elements with * identical keys.<br> * Each CoGroupFunction call returns an arbitrary number of keys. * * @param function The CoGroupFunction that is called for all groups of elements with * identical keys. * @return An CoGroupOperator that represents the co-grouped result DataSet. * @see org.apache.flink.api.common.functions.RichCoGroupFunction * @see DataSet */ public <R> CoGroupOperator<I1, I2, R> with(CoGroupFunction<I1, I2, R> function) { if (function == null) { throw new NullPointerException("CoGroup function must not be null."); } TypeInformation<R> returnType = TypeExtractor.getCoGroupReturnTypes( function, input1.getType(), input2.getType(), Utils.getCallLocationName(), true); return new CoGroupOperator<>( input1, input2, keys1, keys2, input1.clean(function), returnType, groupSortKeyOrderFirst, groupSortKeyOrderSecond, customPartitioner, Utils.getCallLocationName()); }
@Test public void testImmutableEmpty() { try { TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>( 1024 * 1024); context.getTaskConfig().setRelativeMemoryDriver(0.5); List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData(); Collections.shuffle(data); TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get(); context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE); TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig()); GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>( typeInfo.createSerializer(new ExecutionConfig())); context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig())); context.setComparator1(comparator); context.setCollector(result); ReduceCombineDriver<Tuple2<String, Integer>> driver = new ReduceCombineDriver<Tuple2<String, Integer>>(); driver.setup(context); driver.prepare(); driver.run(); Assert.assertEquals(0, result.getList().size()); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
public class JumpingTimePreReducerTest { TypeSerializer<Integer> serializer = TypeExtractor.getForObject(1).createSerializer(null); ReduceFunction<Integer> reducer = new SumReducer(); @Test public void testEmitWindow() throws Exception { TestCollector<StreamWindow<Integer>> collector = new TestCollector<StreamWindow<Integer>>(); List<StreamWindow<Integer>> collected = collector.getCollected(); WindowBuffer<Integer> wb = new JumpingTimePreReducer<Integer>( reducer, serializer, 3, 2, new TimestampWrapper<Integer>( new Timestamp<Integer>() { private static final long serialVersionUID = 1L; @Override public long getTimestamp(Integer value) { return value; } }, 1)); wb.store(1); wb.store(2); wb.store(3); wb.evict(1); wb.emitWindow(collector); assertEquals(1, collected.size()); assertEquals(StreamWindow.fromElements(5), collected.get(0)); wb.store(4); wb.store(5); // Nothing should happen here wb.evict(2); wb.store(6); wb.emitWindow(collector); wb.evict(2); wb.emitWindow(collector); wb.store(12); wb.emitWindow(collector); assertEquals(3, collected.size()); assertEquals(StreamWindow.fromElements(11), collected.get(1)); assertEquals(StreamWindow.fromElements(12), collected.get(2)); } private static class SumReducer implements ReduceFunction<Integer> { private static final long serialVersionUID = 1L; @Override public Integer reduce(Integer value1, Integer value2) throws Exception { return value1 + value2; } } }
@SuppressWarnings({"unchecked", "rawtypes"}) private SingleOutputStreamOperator<?, ?> createOutput( String boltId, IRichBolt bolt, Map<GlobalStreamId, DataStream<Tuple>> inputStreams) { assert (boltId != null); assert (bolt != null); assert (inputStreams != null); Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator = inputStreams.entrySet().iterator(); Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next(); GlobalStreamId streamId1 = input1.getKey(); String inputStreamId1 = streamId1.get_streamId(); String inputComponentId1 = streamId1.get_componentId(); Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1); DataStream<Tuple> singleInputStream = input1.getValue(); DataStream<StormTuple<Tuple>> mergedInputStream = null; while (iterator.hasNext()) { Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next(); GlobalStreamId streamId2 = input2.getKey(); DataStream<Tuple> inputStream2 = input2.getValue(); if (mergedInputStream == null) { mergedInputStream = singleInputStream .connect(inputStream2) .flatMap( new TwoFlinkStreamsMerger( streamId1, inputSchema1, streamId2, this.outputStreams .get(streamId2.get_componentId()) .get(streamId2.get_streamId()))) .returns(StormTuple.class); } else { mergedInputStream = mergedInputStream .connect(inputStream2) .flatMap( new StormFlinkStreamMerger( streamId2, this.outputStreams .get(streamId2.get_componentId()) .get(streamId2.get_streamId()))) .returns(StormTuple.class); } } final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId); final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId); final SingleOutputStreamOperator<?, ?> outputStream; if (boltOutputs.size() < 2) { // single output stream or sink String outputStreamId; if (boltOutputs.size() == 1) { outputStreamId = (String) boltOutputs.keySet().toArray()[0]; } else { outputStreamId = null; } final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId); final SingleOutputStreamOperator<Tuple, ?> outStream; // only one input if (inputStreams.entrySet().size() == 1) { BoltWrapper<Tuple, Tuple> boltWrapper = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null); boltWrapper.setStormTopology(stormTopology); outStream = singleInputStream.transform(boltId, outType, boltWrapper); } else { MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper = new MergedInputsBoltWrapper<Tuple, Tuple>(bolt, boltId, null); boltWrapper.setStormTopology(stormTopology); outStream = mergedInputStream.transform(boltId, outType, boltWrapper); } if (outType != null) { // only for non-sink nodes final HashMap<String, DataStream<Tuple>> op = new HashMap<>(); op.put(outputStreamId, outStream); availableInputs.put(boltId, op); } outputStream = outStream; } else { final TypeInformation<SplitStreamType<Tuple>> outType = (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class); final SingleOutputStreamOperator<SplitStreamType<Tuple>, ?> multiStream; // only one input if (inputStreams.entrySet().size() == 1) { final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null); boltWrapperMultipleOutputs.setStormTopology(stormTopology); multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs); } else { final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(bolt, boltId, null); boltWrapperMultipleOutputs.setStormTopology(stormTopology); multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs); } final SplitStream<SplitStreamType<Tuple>> splitStream = multiStream.split(new StormStreamSelector<Tuple>()); final HashMap<String, DataStream<Tuple>> op = new HashMap<>(); for (String outputStreamId : boltOutputs.keySet()) { op.put( outputStreamId, splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>())); SingleOutputStreamOperator<Tuple, ?> outStream = splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>()); outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId)); op.put(outputStreamId, outStream); } availableInputs.put(boltId, op); outputStream = multiStream; } return outputStream; }
@Test public void testAllReduceDriverImmutable() { try { { TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>(); List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData(); TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<String, Integer>> input = new RegularToMutableObjectIterator<Tuple2<String, Integer>>( data.iterator(), typeInfo.createSerializer()); GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer()); context.setDriverStrategy(DriverStrategy.ALL_REDUCE); context.setInput1(input, typeInfo.createSerializer()); context.setCollector(result); context.setUdf(new ConcatSumFirstReducer()); AllReduceDriver<Tuple2<String, Integer>> driver = new AllReduceDriver<Tuple2<String, Integer>>(); driver.setup(context); driver.prepare(); driver.run(); Tuple2<String, Integer> res = result.getList().get(0); char[] foundString = res.f0.toCharArray(); Arrays.sort(foundString); char[] expectedString = "abcddeeeffff".toCharArray(); Arrays.sort(expectedString); Assert.assertArrayEquals(expectedString, foundString); Assert.assertEquals(78, res.f1.intValue()); } { TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>(); List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData(); TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<String, Integer>> input = new RegularToMutableObjectIterator<Tuple2<String, Integer>>( data.iterator(), typeInfo.createSerializer()); GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer()); context.setDriverStrategy(DriverStrategy.ALL_REDUCE); context.setInput1(input, typeInfo.createSerializer()); context.setCollector(result); context.setUdf(new ConcatSumSecondReducer()); AllReduceDriver<Tuple2<String, Integer>> driver = new AllReduceDriver<Tuple2<String, Integer>>(); driver.setup(context); driver.prepare(); driver.run(); Tuple2<String, Integer> res = result.getList().get(0); char[] foundString = res.f0.toCharArray(); Arrays.sort(foundString); char[] expectedString = "abcddeeeffff".toCharArray(); Arrays.sort(expectedString); Assert.assertArrayEquals(expectedString, foundString); Assert.assertEquals(78, res.f1.intValue()); } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
public class SlidingTimePreReducerTest { TypeSerializer<Integer> serializer = TypeExtractor.getForObject(1).createSerializer(null); TypeInformation<Tuple2<Integer, Integer>> tupleType = TypeInfoParser.parse("Tuple2<Integer,Integer>"); ReduceFunction<Integer> reducer = new SumReducer(); ReduceFunction<Tuple2<Integer, Integer>> tupleReducer = new TupleSumReducer(); @Test @SuppressWarnings("unchecked") public void testPreReduce1() throws Exception { // This ensures that the buffer is properly cleared after a burst of elements by // replaying the same sequence of elements with a later timestamp and expecting the same // result. TestOutput<StreamWindow<Tuple2<Integer, Integer>>> collector = new TestOutput<StreamWindow<Tuple2<Integer, Integer>>>(); SlidingTimePreReducer<Tuple2<Integer, Integer>> preReducer = new SlidingTimePreReducer<Tuple2<Integer, Integer>>( tupleReducer, tupleType.createSerializer(new ExecutionConfig()), 3, 2, new TimestampWrapper<Tuple2<Integer, Integer>>( new Timestamp<Tuple2<Integer, Integer>>() { private static final long serialVersionUID = 1L; @Override public long getTimestamp(Tuple2<Integer, Integer> value) { return value.f0; } }, 1)); int timeOffset = 0; preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 1, 1)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 2, 2)); preReducer.emitWindow(collector); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 3, 3)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 4, 4)); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 5, 5)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 6, 6)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 7, 7)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 8, 8)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 9, 9)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 10, 10)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 11, 11)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 12, 12)); preReducer.emitWindow(collector); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 13, 13)); // ensure that everything is cleared out preReducer.evict(100); timeOffset = 25; // a little while later... // Repeat the same sequence, this should produce the same result preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 1, 1)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 2, 2)); preReducer.emitWindow(collector); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 3, 3)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 4, 4)); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 5, 5)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 6, 6)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 7, 7)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 8, 8)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 9, 9)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 10, 10)); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 11, 11)); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 12, 12)); preReducer.emitWindow(collector); preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 13, 13)); List<StreamWindow<Tuple2<Integer, Integer>>> expected = new ArrayList<StreamWindow<Tuple2<Integer, Integer>>>(); timeOffset = 0; // rewind ... expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 1, 3))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 2, 9))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 4, 15))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 6, 21))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 8, 27))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 10, 33))); timeOffset = 25; // and back to the future ... expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 1, 3))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 2, 9))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 4, 15))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 6, 21))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 8, 27))); expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 10, 33))); assertEquals(expected, collector.getCollected()); } @Test public void testPreReduce2() throws Exception { TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>(); SlidingTimePreReducer<Integer> preReducer = new SlidingTimePreReducer<Integer>( reducer, serializer, 5, 2, new TimestampWrapper<Integer>( new Timestamp<Integer>() { private static final long serialVersionUID = 1L; @Override public long getTimestamp(Integer value) { return value; } }, 1)); preReducer.store(1); preReducer.store(2); preReducer.emitWindow(collector); preReducer.store(3); preReducer.store(4); preReducer.emitWindow(collector); preReducer.store(5); preReducer.store(6); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(7); preReducer.store(8); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(9); preReducer.store(10); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(11); preReducer.store(12); preReducer.emitWindow(collector); preReducer.store(13); List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>(); expected.add(StreamWindow.fromElements(3)); expected.add(StreamWindow.fromElements(10)); expected.add(StreamWindow.fromElements(20)); expected.add(StreamWindow.fromElements(30)); expected.add(StreamWindow.fromElements(40)); expected.add(StreamWindow.fromElements(50)); assertEquals(expected, collector.getCollected()); } @Test public void testPreReduce3() throws Exception { TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>(); SlidingTimePreReducer<Integer> preReducer = new SlidingTimePreReducer<Integer>( reducer, serializer, 6, 3, new TimestampWrapper<Integer>( new Timestamp<Integer>() { private static final long serialVersionUID = 1L; @Override public long getTimestamp(Integer value) { return value; } }, 1)); preReducer.store(1); preReducer.store(2); preReducer.store(3); preReducer.emitWindow(collector); preReducer.store(4); preReducer.store(5); preReducer.store(6); preReducer.emitWindow(collector); preReducer.evict(3); preReducer.store(7); preReducer.store(8); preReducer.store(9); preReducer.emitWindow(collector); preReducer.evict(3); preReducer.store(10); preReducer.store(11); preReducer.store(12); preReducer.emitWindow(collector); preReducer.evict(3); preReducer.store(13); List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>(); expected.add(StreamWindow.fromElements(6)); expected.add(StreamWindow.fromElements(21)); expected.add(StreamWindow.fromElements(39)); expected.add(StreamWindow.fromElements(57)); assertEquals(expected, collector.getCollected()); } @Test public void testPreReduce4() throws Exception { TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>(); SlidingTimePreReducer<Integer> preReducer = new SlidingTimePreReducer<Integer>( reducer, serializer, 3, 2, new TimestampWrapper<Integer>( new Timestamp<Integer>() { private static final long serialVersionUID = 1L; @Override public long getTimestamp(Integer value) { return value; } }, 1)); preReducer.store(1); preReducer.store(2); preReducer.emitWindow(collector); preReducer.store(3); preReducer.store(4); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(5); preReducer.store(6); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(7); preReducer.store(8); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.emitWindow(collector); preReducer.emitWindow(collector); preReducer.evict(2); preReducer.store(14); preReducer.emitWindow(collector); preReducer.emitWindow(collector); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.emitWindow(collector); preReducer.store(21); preReducer.emitWindow(collector); preReducer.evict(1); preReducer.emitWindow(collector); preReducer.store(9); List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>(); expected.add(StreamWindow.fromElements(3)); expected.add(StreamWindow.fromElements(9)); expected.add(StreamWindow.fromElements(15)); expected.add(StreamWindow.fromElements(21)); expected.add(StreamWindow.fromElements(8)); expected.add(StreamWindow.fromElements(8)); expected.add(StreamWindow.fromElements(14)); expected.add(StreamWindow.fromElements(14)); expected.add(StreamWindow.fromElements(21)); assertEquals(expected, collector.getCollected()); } private static class SumReducer implements ReduceFunction<Integer> { private static final long serialVersionUID = 1L; @Override public Integer reduce(Integer value1, Integer value2) throws Exception { return value1 + value2; } } private static class TupleSumReducer implements ReduceFunction<Tuple2<Integer, Integer>> { private static final long serialVersionUID = 1L; @Override public Tuple2<Integer, Integer> reduce( Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception { return new Tuple2<Integer, Integer>(value1.f0, value1.f1 + value2.f1); } } }
/** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the * second co-grouped {@link DataSet}.<br> * The KeySelector function is called for each element of the second DataSet and extracts a * single key value on which the DataSet is grouped. <br> * * @param keyExtractor The KeySelector function which extracts the key values from the second * DataSet on which it is grouped. * @return An incomplete CoGroup transformation. Call {@link * org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)} * to finalize the CoGroup transformation. */ public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input2.getType()); return createCoGroupOperator( new SelectorFunctionKeys<>(keyExtractor, input2.getType(), keyType)); }
/** Specifies a {@link KeySelector} for elements from the first input. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector) { TypeInformation<KEY> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return new Where<>(input1.clean(keySelector), keyType); }
/** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first * co-grouped {@link DataSet}.<br> * The KeySelector function is called for each element of the first DataSet and extracts a * single key value on which the DataSet is grouped. <br> * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet * on which it is grouped. * @return An incomplete CoGroup transformation. Call {@link * org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate#equalTo(int...)} * to continue the CoGroup. * @see KeySelector * @see DataSet */ public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input1.getType()); return new CoGroupOperatorSetsPredicate( new SelectorFunctionKeys<>(keyExtractor, input1.getType(), keyType)); }
@Override public TypeInformation<String> getProducedType() { return TypeExtractor.getForClass(String.class); }
/** Creates a Flink program that uses the specified spouts and bolts. */ private void translateTopology() { unprocessdInputsPerBolt.clear(); outputStreams.clear(); declarers.clear(); availableInputs.clear(); // Storm defaults to parallelism 1 env.setParallelism(1); /* Translation of topology */ for (final Entry<String, IRichSpout> spout : spouts.entrySet()) { final String spoutId = spout.getKey(); final IRichSpout userSpout = spout.getValue(); final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer(); userSpout.declareOutputFields(declarer); final HashMap<String, Fields> sourceStreams = declarer.outputStreams; this.outputStreams.put(spoutId, sourceStreams); declarers.put(spoutId, declarer); final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>(); final DataStreamSource<?> source; if (sourceStreams.size() == 1) { final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null); spoutWrapperSingleOutput.setStormTopology(stormTopology); final String outputStreamId = (String) sourceStreams.keySet().toArray()[0]; DataStreamSource<Tuple> src = env.addSource( spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId)); outputStreams.put(outputStreamId, src); source = src; } else { final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null); spoutWrapperMultipleOutputs.setStormTopology(stormTopology); @SuppressWarnings({"unchecked", "rawtypes"}) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource( spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class)); SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>()); for (String streamId : sourceStreams.keySet()) { SingleOutputStreamOperator<Tuple, ?> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>()); outStream.getTransformation().setOutputType(declarer.getOutputType(streamId)); outputStreams.put(streamId, outStream); } source = multiSource; } availableInputs.put(spoutId, outputStreams); final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common(); if (common.is_set_parallelism_hint()) { int dop = common.get_parallelism_hint(); source.setParallelism(dop); } else { common.set_parallelism_hint(1); } } /** * 1. Connect all spout streams with bolts streams 2. Then proceed with the bolts stream already * connected * * <p>Because we do not know the order in which an iterator steps over a set, we might process a * consumer before its producer ->thus, we might need to repeat multiple times */ boolean makeProgress = true; while (bolts.size() > 0) { if (!makeProgress) { StringBuilder strBld = new StringBuilder(); strBld.append("Unable to build Topology. Could not connect the following bolts:"); for (String boltId : bolts.keySet()) { strBld.append("\n "); strBld.append(boltId); strBld.append(": missing input streams ["); for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) { strBld.append("'"); strBld.append(streams.getKey().get_streamId()); strBld.append("' from '"); strBld.append(streams.getKey().get_componentId()); strBld.append("'; "); } strBld.append("]"); } throw new RuntimeException(strBld.toString()); } makeProgress = false; final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator(); while (boltsIterator.hasNext()) { final Entry<String, IRichBolt> bolt = boltsIterator.next(); final String boltId = bolt.getKey(); final IRichBolt userBolt = copyObject(bolt.getValue()); final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common(); Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId); if (unprocessedBoltInputs == null) { unprocessedBoltInputs = new HashSet<>(); unprocessedBoltInputs.addAll(common.get_inputs().entrySet()); unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs); } // check if all inputs are available final int numberOfInputs = unprocessedBoltInputs.size(); int inputsAvailable = 0; for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) { final String producerId = entry.getKey().get_componentId(); final String streamId = entry.getKey().get_streamId(); final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId); if (streams != null && streams.get(streamId) != null) { inputsAvailable++; } } if (inputsAvailable != numberOfInputs) { // traverse other bolts first until inputs are available continue; } else { makeProgress = true; boltsIterator.remove(); } final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs); for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) { final GlobalStreamId streamId = input.getKey(); final Grouping grouping = input.getValue(); final String producerId = streamId.get_componentId(); final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId); inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer)); } final SingleOutputStreamOperator<?, ?> outputStream = createOutput(boltId, userBolt, inputStreams); if (common.is_set_parallelism_hint()) { int dop = common.get_parallelism_hint(); outputStream.setParallelism(dop); } else { common.set_parallelism_hint(1); } } } }
@Test public void testReduceDriverMutable() { try { { TestTaskContext< ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext< ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>( 1024 * 1024); context.getTaskConfig().setRelativeMemoryDriver(0.5); List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData(); TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>( data.iterator(), typeInfo.createSerializer(new ExecutionConfig())); TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator( new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig()); GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>( typeInfo.createSerializer(new ExecutionConfig())); context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE); context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig())); context.setComparator1(comparator); context.setCollector(result); context.setUdf(new ConcatSumFirstMutableReducer()); ReduceCombineDriver<Tuple2<StringValue, IntValue>> driver = new ReduceCombineDriver<Tuple2<StringValue, IntValue>>(); driver.setup(context); driver.prepare(); driver.run(); Object[] res = result.getList().toArray(); Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray(); DriverTestData.compareTupleArrays(expected, res); } { TestTaskContext< ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext< ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>( 1024 * 1024); context.getTaskConfig().setRelativeMemoryDriver(0.5); List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData(); TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0)); MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>( data.iterator(), typeInfo.createSerializer(new ExecutionConfig())); TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator( new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig()); GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>( typeInfo.createSerializer(new ExecutionConfig())); context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE); context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig())); context.setComparator1(comparator); context.setCollector(result); context.setUdf(new ConcatSumSecondMutableReducer()); ReduceCombineDriver<Tuple2<StringValue, IntValue>> driver = new ReduceCombineDriver<Tuple2<StringValue, IntValue>>(); driver.setup(context); driver.prepare(); driver.run(); Object[] res = result.getList().toArray(); Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray(); DriverTestData.compareTupleArrays(expected, res); } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
public class TumblingGroupedPreReducerTest { TypeInformation<Tuple2<Integer, Integer>> type = TypeExtractor.getForObject(new Tuple2<Integer, Integer>(1, 1)); TypeSerializer<Tuple2<Integer, Integer>> serializer = type.createSerializer(null); KeySelector<Tuple2<Integer, Integer>, ?> key = KeySelectorUtil.getSelectorForKeys( new Keys.ExpressionKeys<Tuple2<Integer, Integer>>(new int[] {0}, type), type, null); Reducer reducer = new Reducer(); @SuppressWarnings("unchecked") @Test public void testEmitWindow() throws Exception { List<Tuple2<Integer, Integer>> inputs = new ArrayList<Tuple2<Integer, Integer>>(); inputs.add(new Tuple2<Integer, Integer>(1, 1)); inputs.add(new Tuple2<Integer, Integer>(0, 0)); inputs.add(new Tuple2<Integer, Integer>(1, -1)); inputs.add(new Tuple2<Integer, Integer>(1, -2)); TestCollector<StreamWindow<Tuple2<Integer, Integer>>> collector = new TestCollector<StreamWindow<Tuple2<Integer, Integer>>>(); List<StreamWindow<Tuple2<Integer, Integer>>> collected = collector.getCollected(); WindowBuffer<Tuple2<Integer, Integer>> wb = new TumblingGroupedPreReducer<Tuple2<Integer, Integer>>(reducer, key, serializer); wb.store(serializer.copy(inputs.get(0))); wb.store(serializer.copy(inputs.get(1))); wb.emitWindow(collector); wb.evict(2); assertEquals(1, collected.size()); assertSetEquals( StreamWindow.fromElements( new Tuple2<Integer, Integer>(1, 1), new Tuple2<Integer, Integer>(0, 0)), collected.get(0)); wb.store(serializer.copy(inputs.get(0))); wb.store(serializer.copy(inputs.get(1))); wb.store(serializer.copy(inputs.get(2))); wb.store(serializer.copy(inputs.get(3))); wb.emitWindow(collector); wb.evict(4); assertEquals(2, collected.size()); assertSetEquals( StreamWindow.fromElements( new Tuple2<Integer, Integer>(3, -2), new Tuple2<Integer, Integer>(0, 0)), collected.get(1)); // Test whether function is mutating inputs or not assertEquals(2, reducer.allInputs.size()); assertEquals(reducer.allInputs.get(0), inputs.get(2)); assertEquals(reducer.allInputs.get(1), inputs.get(3)); } @SuppressWarnings("unchecked") @Test public void testEmitWindow2() throws Exception { List<Tuple2<Integer, Integer>> inputs = new ArrayList<Tuple2<Integer, Integer>>(); inputs.add(new Tuple2<Integer, Integer>(1, 1)); inputs.add(new Tuple2<Integer, Integer>(0, 0)); inputs.add(new Tuple2<Integer, Integer>(1, -1)); inputs.add(new Tuple2<Integer, Integer>(1, -2)); TestCollector<StreamWindow<Tuple2<Integer, Integer>>> collector = new TestCollector<StreamWindow<Tuple2<Integer, Integer>>>(); List<StreamWindow<Tuple2<Integer, Integer>>> collected = collector.getCollected(); WindowBuffer<Tuple2<Integer, Integer>> wb = new TumblingGroupedPreReducer<Tuple2<Integer, Integer>>(reducer, key, serializer) .sequentialID(); wb.store(serializer.copy(inputs.get(0))); wb.store(serializer.copy(inputs.get(1))); wb.emitWindow(collector); wb.evict(2); assertSetEquals(StreamWindow.fromElements(inputs.get(0), inputs.get(1)), collected.get(0)); wb.store(serializer.copy(inputs.get(0))); wb.store(serializer.copy(inputs.get(1))); wb.store(serializer.copy(inputs.get(2))); wb.emitWindow(collector); wb.evict(3); assertSetEquals( StreamWindow.fromElements(new Tuple2<Integer, Integer>(2, 0), inputs.get(1)), collected.get(1)); } private static <T> void assertSetEquals(Collection<T> first, Collection<T> second) { assertEquals(new HashSet<T>(first), new HashSet<T>(second)); } @SuppressWarnings("serial") private class Reducer implements ReduceFunction<Tuple2<Integer, Integer>> { public List<Tuple2<Integer, Integer>> allInputs = new ArrayList<Tuple2<Integer, Integer>>(); @Override public Tuple2<Integer, Integer> reduce( Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception { allInputs.add(value2); value1.f0 = value1.f0 + value2.f0; value1.f1 = value1.f1 + value2.f1; return value1; } } }