/** * Tests that {@link DataStream#groupBy} and {@link DataStream#partitionByHash} result in * different and correct topologies. Does the some for the {@link ConnectedDataStream}. */ @Test @SuppressWarnings("unchecked") public void testPartitioning() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream src1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DataStream src2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); ConnectedDataStream connected = src1.connect(src2); // Testing DataStream grouping DataStream group1 = src1.groupBy(0); DataStream group2 = src1.groupBy(1, 0); DataStream group3 = src1.groupBy("f0"); DataStream group4 = src1.groupBy(new FirstSelector()); int id1 = createDownStreamId(group1); int id2 = createDownStreamId(group2); int id3 = createDownStreamId(group3); int id4 = createDownStreamId(group4); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id4))); assertTrue(isGrouped(group1)); assertTrue(isGrouped(group2)); assertTrue(isGrouped(group3)); assertTrue(isGrouped(group4)); // Testing DataStream partitioning DataStream partition1 = src1.partitionByHash(0); DataStream partition2 = src1.partitionByHash(1, 0); DataStream partition3 = src1.partitionByHash("f0"); DataStream partition4 = src1.partitionByHash(new FirstSelector()); int pid1 = createDownStreamId(partition1); int pid2 = createDownStreamId(partition2); int pid3 = createDownStreamId(partition3); int pid4 = createDownStreamId(partition4); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid4))); assertFalse(isGrouped(partition1)); assertFalse(isGrouped(partition3)); assertFalse(isGrouped(partition2)); assertFalse(isGrouped(partition4)); // Testing DataStream custom partitioning Partitioner<Long> longPartitioner = new Partitioner<Long>() { @Override public int partition(Long key, int numPartitions) { return 100; } }; DataStream customPartition1 = src1.partitionCustom(longPartitioner, 0); DataStream customPartition3 = src1.partitionCustom(longPartitioner, "f0"); DataStream customPartition4 = src1.partitionCustom(longPartitioner, new FirstSelector()); int cid1 = createDownStreamId(customPartition1); int cid2 = createDownStreamId(customPartition3); int cid3 = createDownStreamId(customPartition4); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid1))); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid2))); assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid3))); assertFalse(isGrouped(customPartition1)); assertFalse(isGrouped(customPartition3)); assertFalse(isGrouped(customPartition4)); // Testing ConnectedDataStream grouping ConnectedDataStream connectedGroup1 = connected.groupBy(0, 0); Integer downStreamId1 = createDownStreamId(connectedGroup1); ConnectedDataStream connectedGroup2 = connected.groupBy(new int[] {0}, new int[] {0}); Integer downStreamId2 = createDownStreamId(connectedGroup2); ConnectedDataStream connectedGroup3 = connected.groupBy("f0", "f0"); Integer downStreamId3 = createDownStreamId(connectedGroup3); ConnectedDataStream connectedGroup4 = connected.groupBy(new String[] {"f0"}, new String[] {"f0"}); Integer downStreamId4 = createDownStreamId(connectedGroup4); ConnectedDataStream connectedGroup5 = connected.groupBy(new FirstSelector(), new FirstSelector()); Integer downStreamId5 = createDownStreamId(connectedGroup5); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId1))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId2))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId3))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId4))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId4))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId5))); assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId5))); assertTrue(isGrouped(connectedGroup1)); assertTrue(isGrouped(connectedGroup2)); assertTrue(isGrouped(connectedGroup3)); assertTrue(isGrouped(connectedGroup4)); assertTrue(isGrouped(connectedGroup5)); // Testing ConnectedDataStream partitioning ConnectedDataStream connectedPartition1 = connected.partitionByHash(0, 0); Integer connectDownStreamId1 = createDownStreamId(connectedPartition1); ConnectedDataStream connectedPartition2 = connected.partitionByHash(new int[] {0}, new int[] {0}); Integer connectDownStreamId2 = createDownStreamId(connectedPartition2); ConnectedDataStream connectedPartition3 = connected.partitionByHash("f0", "f0"); Integer connectDownStreamId3 = createDownStreamId(connectedPartition3); ConnectedDataStream connectedPartition4 = connected.partitionByHash(new String[] {"f0"}, new String[] {"f0"}); Integer connectDownStreamId4 = createDownStreamId(connectedPartition4); ConnectedDataStream connectedPartition5 = connected.partitionByHash(new FirstSelector(), new FirstSelector()); Integer connectDownStreamId5 = createDownStreamId(connectedPartition5); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId1))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId1))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId2))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId2))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId3))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId3))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId4))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId4))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId5))); assertTrue( isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId5))); assertFalse(isGrouped(connectedPartition1)); assertFalse(isGrouped(connectedPartition2)); assertFalse(isGrouped(connectedPartition3)); assertFalse(isGrouped(connectedPartition4)); assertFalse(isGrouped(connectedPartition5)); }