private static Integer createDownStreamId(ConnectedDataStream dataStream) { SingleOutputStreamOperator coMap = dataStream.map( new CoMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Object>() { @Override public Object map1(Tuple2<Long, Long> value) { return null; } @Override public Object map2(Tuple2<Long, Long> value) { return null; } }); coMap.addSink(new NoOpSink()); return coMap.getId(); }
private static boolean isGrouped(ConnectedDataStream dataStream) { return (dataStream.getFirst() instanceof GroupedDataStream && dataStream.getSecond() instanceof GroupedDataStream); }
@Test public void operatorTest() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Long> src = env.generateSequence(0, 0); MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() { @Override public Integer map(Long value) throws Exception { return null; } }; DataStream<Integer> map = src.map(mapFunction); map.addSink(new NoOpSink<Integer>()); assertEquals(mapFunction, getFunctionForDataStream(map)); FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() { @Override public void flatMap(Long value, Collector<Integer> out) throws Exception {} }; DataStream<Integer> flatMap = src.flatMap(flatMapFunction); flatMap.addSink(new NoOpSink<Integer>()); assertEquals(flatMapFunction, getFunctionForDataStream(flatMap)); FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() { @Override public boolean filter(Integer value) throws Exception { return false; } }; DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction); unionFilter.addSink(new NoOpSink<Integer>()); assertEquals(filterFunction, getFunctionForDataStream(unionFilter)); try { env.getStreamGraph().getStreamEdge(map.getId(), unionFilter.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } try { env.getStreamGraph().getStreamEdge(flatMap.getId(), unionFilter.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() { @Override public Iterable<String> select(Integer value) { return null; } }; SplitDataStream<Integer> split = unionFilter.split(outputSelector); split.select("dummy").addSink(new NoOpSink<Integer>()); List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors(); assertEquals(1, outputSelectors.size()); assertEquals(outputSelector, outputSelectors.get(0)); DataStream<Integer> select = split.select("a"); DataStreamSink<Integer> sink = select.print(); StreamEdge splitEdge = env.getStreamGraph().getStreamEdge(unionFilter.getId(), sink.getTransformation().getId()); assertEquals("a", splitEdge.getSelectedNames().get(0)); ConnectedDataStream<Integer, Integer> connect = map.connect(flatMap); CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() { @Override public String map1(Integer value) { return null; } @Override public String map2(Integer value) { return null; } }; DataStream<String> coMap = connect.map(coMapper); coMap.addSink(new NoOpSink<String>()); assertEquals(coMapper, getFunctionForDataStream(coMap)); try { env.getStreamGraph().getStreamEdge(map.getId(), coMap.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } try { env.getStreamGraph().getStreamEdge(flatMap.getId(), coMap.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } }
/** * Tests that {@link DataStream#groupBy} and {@link DataStream#partitionByHash} result in * different and correct topologies. Does the some for the {@link ConnectedDataStream}. */ @Test @SuppressWarnings("unchecked") public void testPartitioning() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream src1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DataStream src2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); ConnectedDataStream connected = src1.connect(src2); // Testing DataStream grouping DataStream group1 = src1.groupBy(0); DataStream group2 = src1.groupBy(1, 0); DataStream group3 = src1.groupBy("f0"); DataStream group4 = src1.groupBy(new FirstSelector()); int id1 = createDownStreamId(group1); int id2 = createDownStreamId(group2); int id3 = createDownStreamId(group3); int id4 = createDownStreamId(group4); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id4))); assertTrue(isGrouped(group1)); assertTrue(isGrouped(group2)); assertTrue(isGrouped(group3)); assertTrue(isGrouped(group4)); // Testing DataStream partitioning DataStream partition1 = src1.partitionByHash(0); DataStream partition2 = src1.partitionByHash(1, 0); DataStream partition3 = src1.partitionByHash("f0"); DataStream partition4 = src1.partitionByHash(new FirstSelector()); int pid1 = createDownStreamId(partition1); int pid2 = createDownStreamId(partition2); int pid3 = createDownStreamId(partition3); int pid4 = createDownStreamId(partition4); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid4))); assertFalse(isGrouped(partition1)); assertFalse(isGrouped(partition3)); assertFalse(isGrouped(partition2)); assertFalse(isGrouped(partition4)); // Testing DataStream custom partitioning Partitioner<Long> longPartitioner = new Partitioner<Long>() { @Override public int partition(Long key, int numPartitions) { return 100; } }; DataStream customPartition1 = src1.partitionCustom(longPartitioner, 0); DataStream customPartition3 = src1.partitionCustom(longPartitioner, "f0"); DataStream customPartition4 = src1.partitionCustom(longPartitioner, new FirstSelector()); int cid1 = createDownStreamId(customPartition1); int cid2 = createDownStreamId(customPartition3); int cid3 = createDownStreamId(customPartition4); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid1))); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid2))); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid3))); assertFalse(isGrouped(customPartition1)); assertFalse(isGrouped(customPartition3)); assertFalse(isGrouped(customPartition4)); // Testing ConnectedDataStream grouping ConnectedDataStream connectedGroup1 = connected.groupBy(0, 0); Integer downStreamId1 = createDownStreamId(connectedGroup1); ConnectedDataStream connectedGroup2 = connected.groupBy(new int[] {0}, new int[] {0}); Integer downStreamId2 = createDownStreamId(connectedGroup2); ConnectedDataStream connectedGroup3 = connected.groupBy("f0", "f0"); Integer downStreamId3 = createDownStreamId(connectedGroup3); ConnectedDataStream connectedGroup4 = connected.groupBy(new String[] {"f0"}, new String[] {"f0"}); Integer downStreamId4 = createDownStreamId(connectedGroup4); ConnectedDataStream connectedGroup5 = connected.groupBy(new FirstSelector(), new FirstSelector()); Integer downStreamId5 = createDownStreamId(connectedGroup5); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId4))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId4))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId5))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId5))); assertTrue(isGrouped(connectedGroup1)); assertTrue(isGrouped(connectedGroup2)); assertTrue(isGrouped(connectedGroup3)); assertTrue(isGrouped(connectedGroup4)); assertTrue(isGrouped(connectedGroup5)); // Testing ConnectedDataStream partitioning ConnectedDataStream connectedPartition1 = connected.partitionByHash(0, 0); Integer connectDownStreamId1 = createDownStreamId(connectedPartition1); ConnectedDataStream connectedPartition2 = connected.partitionByHash(new int[] {0}, new int[] {0}); Integer connectDownStreamId2 = createDownStreamId(connectedPartition2); ConnectedDataStream connectedPartition3 = connected.partitionByHash("f0", "f0"); Integer connectDownStreamId3 = createDownStreamId(connectedPartition3); ConnectedDataStream connectedPartition4 = connected.partitionByHash(new String[] {"f0"}, new String[] {"f0"}); Integer connectDownStreamId4 = createDownStreamId(connectedPartition4); ConnectedDataStream connectedPartition5 = connected.partitionByHash(new FirstSelector(), new FirstSelector()); Integer connectDownStreamId5 = createDownStreamId(connectedPartition5); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId1))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId1))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId2))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId2))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId3))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId3))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId4))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId4))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId5))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId5))); assertFalse(isGrouped(connectedPartition1)); assertFalse(isGrouped(connectedPartition2)); assertFalse(isGrouped(connectedPartition3)); assertFalse(isGrouped(connectedPartition4)); assertFalse(isGrouped(connectedPartition5)); }