@Test public void testTumblingTimeWindow() { final int NUM_ELEMENTS_PER_KEY = 3000; final int WINDOW_SIZE = 100; final int NUM_KEYS = 100; FailingSource.reset(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment( "localhost", cluster.getLeaderRPCPort()); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setNumberOfExecutionRetries(3); env.getConfig().disableSysoutLogging(); env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)) .rebalance() .keyBy(0) .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)) .apply( new RichWindowFunction< Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); int sum = 0; long key = -1; for (Tuple2<Long, IntType> value : values) { sum += value.f1.value; key = value.f0; } out.collect( new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum))); } }) .addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)) .setParallelism(1); tryExecute(env, "Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testSimpleKeyedPatternEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(2); // (Event, timestamp) DataStream<Event> input = env.fromElements( Tuple2.of(new Event(1, "start", 1.0), 5L), Tuple2.of(new Event(1, "middle", 2.0), 1L), Tuple2.of(new Event(2, "middle", 2.0), 4L), Tuple2.of(new Event(2, "start", 2.0), 3L), Tuple2.of(new Event(1, "end", 3.0), 3L), Tuple2.of(new Event(3, "start", 4.1), 5L), Tuple2.of(new Event(1, "end", 4.0), 10L), Tuple2.of(new Event(2, "end", 2.0), 8L), Tuple2.of(new Event(1, "middle", 5.0), 7L), Tuple2.of(new Event(3, "middle", 6.0), 9L), Tuple2.of(new Event(3, "end", 7.0), 7L), // last element for high final watermark Tuple2.of(new Event(3, "end", 7.0), 100L)) .assignTimestampsAndWatermarks( new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() { @Override public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) { return element.f1; } @Override public Watermark checkAndGetNextWatermark( Tuple2<Event, Long> lastElement, long extractedTimestamp) { return new Watermark(lastElement.f1 - 5); } }) .map( new MapFunction<Tuple2<Event, Long>, Event>() { @Override public Event map(Tuple2<Event, Long> value) throws Exception { return value.f0; } }) .keyBy( new KeySelector<Event, Integer>() { @Override public Integer getKey(Event value) throws Exception { return value.getId(); } }); Pattern<Event, ?> pattern = Pattern.<Event>begin("start") .where( new FilterFunction<Event>() { @Override public boolean filter(Event value) throws Exception { return value.getName().equals("start"); } }) .followedBy("middle") .where( new FilterFunction<Event>() { @Override public boolean filter(Event value) throws Exception { return value.getName().equals("middle"); } }) .followedBy("end") .where( new FilterFunction<Event>() { @Override public boolean filter(Event value) throws Exception { return value.getName().equals("end"); } }); DataStream<String> result = CEP.pattern(input, pattern) .select( new PatternSelectFunction<Event, String>() { @Override public String select(Map<String, Event> pattern) { StringBuilder builder = new StringBuilder(); builder .append(pattern.get("start").getId()) .append(",") .append(pattern.get("middle").getId()) .append(",") .append(pattern.get("end").getId()); return builder.toString(); } }); result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE); // the expected sequences of matching event ids expected = "1,1,1\n2,2,2"; env.execute(); }
@Test public void testTumblingTimeWindowWithKVState() { final int NUM_ELEMENTS_PER_KEY = 3000; final int WINDOW_SIZE = 100; final int NUM_KEYS = 100; FailingSource.reset(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment( "localhost", cluster.getLeaderRPCPort()); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setNumberOfExecutionRetries(3); env.getConfig().disableSysoutLogging(); env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)) .rebalance() .keyBy(0) .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)) .apply( new RichWindowFunction< Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; private OperatorState<Integer> count; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; count = getRuntimeContext().getKeyValueState("count", Integer.class, 0); } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception { // the window count state starts with the key, so that we get // different count results for each key if (count.value() == 0) { count.update(tuple.<Long>getField(0).intValue()); } // validate that the function has been opened properly assertTrue(open); count.update(count.value() + 1); out.collect( new Tuple4<>( tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value()))); } }) .addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)) .setParallelism(1); tryExecute(env, "Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }