Exemplo n.º 1
0
  /**
   * Tests that {@link DataStream#groupBy} and {@link DataStream#partitionByHash} result in
   * different and correct topologies. Does the some for the {@link ConnectedDataStream}.
   */
  @Test
  @SuppressWarnings("unchecked")
  public void testPartitioning() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream src1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
    DataStream src2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
    ConnectedDataStream connected = src1.connect(src2);

    // Testing DataStream grouping
    DataStream group1 = src1.groupBy(0);
    DataStream group2 = src1.groupBy(1, 0);
    DataStream group3 = src1.groupBy("f0");
    DataStream group4 = src1.groupBy(new FirstSelector());

    int id1 = createDownStreamId(group1);
    int id2 = createDownStreamId(group2);
    int id3 = createDownStreamId(group3);
    int id4 = createDownStreamId(group4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id4)));

    assertTrue(isGrouped(group1));
    assertTrue(isGrouped(group2));
    assertTrue(isGrouped(group3));
    assertTrue(isGrouped(group4));

    // Testing DataStream partitioning
    DataStream partition1 = src1.partitionByHash(0);
    DataStream partition2 = src1.partitionByHash(1, 0);
    DataStream partition3 = src1.partitionByHash("f0");
    DataStream partition4 = src1.partitionByHash(new FirstSelector());

    int pid1 = createDownStreamId(partition1);
    int pid2 = createDownStreamId(partition2);
    int pid3 = createDownStreamId(partition3);
    int pid4 = createDownStreamId(partition4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid4)));

    assertFalse(isGrouped(partition1));
    assertFalse(isGrouped(partition3));
    assertFalse(isGrouped(partition2));
    assertFalse(isGrouped(partition4));

    // Testing DataStream custom partitioning
    Partitioner<Long> longPartitioner =
        new Partitioner<Long>() {
          @Override
          public int partition(Long key, int numPartitions) {
            return 100;
          }
        };

    DataStream customPartition1 = src1.partitionCustom(longPartitioner, 0);
    DataStream customPartition3 = src1.partitionCustom(longPartitioner, "f0");
    DataStream customPartition4 = src1.partitionCustom(longPartitioner, new FirstSelector());

    int cid1 = createDownStreamId(customPartition1);
    int cid2 = createDownStreamId(customPartition3);
    int cid3 = createDownStreamId(customPartition4);

    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid1)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid2)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid3)));

    assertFalse(isGrouped(customPartition1));
    assertFalse(isGrouped(customPartition3));
    assertFalse(isGrouped(customPartition4));

    // Testing ConnectedDataStream grouping
    ConnectedDataStream connectedGroup1 = connected.groupBy(0, 0);
    Integer downStreamId1 = createDownStreamId(connectedGroup1);

    ConnectedDataStream connectedGroup2 = connected.groupBy(new int[] {0}, new int[] {0});
    Integer downStreamId2 = createDownStreamId(connectedGroup2);

    ConnectedDataStream connectedGroup3 = connected.groupBy("f0", "f0");
    Integer downStreamId3 = createDownStreamId(connectedGroup3);

    ConnectedDataStream connectedGroup4 =
        connected.groupBy(new String[] {"f0"}, new String[] {"f0"});
    Integer downStreamId4 = createDownStreamId(connectedGroup4);

    ConnectedDataStream connectedGroup5 =
        connected.groupBy(new FirstSelector(), new FirstSelector());
    Integer downStreamId5 = createDownStreamId(connectedGroup5);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId1)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId2)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId3)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId4)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId4)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId5)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId5)));

    assertTrue(isGrouped(connectedGroup1));
    assertTrue(isGrouped(connectedGroup2));
    assertTrue(isGrouped(connectedGroup3));
    assertTrue(isGrouped(connectedGroup4));
    assertTrue(isGrouped(connectedGroup5));

    // Testing ConnectedDataStream partitioning
    ConnectedDataStream connectedPartition1 = connected.partitionByHash(0, 0);
    Integer connectDownStreamId1 = createDownStreamId(connectedPartition1);

    ConnectedDataStream connectedPartition2 =
        connected.partitionByHash(new int[] {0}, new int[] {0});
    Integer connectDownStreamId2 = createDownStreamId(connectedPartition2);

    ConnectedDataStream connectedPartition3 = connected.partitionByHash("f0", "f0");
    Integer connectDownStreamId3 = createDownStreamId(connectedPartition3);

    ConnectedDataStream connectedPartition4 =
        connected.partitionByHash(new String[] {"f0"}, new String[] {"f0"});
    Integer connectDownStreamId4 = createDownStreamId(connectedPartition4);

    ConnectedDataStream connectedPartition5 =
        connected.partitionByHash(new FirstSelector(), new FirstSelector());
    Integer connectDownStreamId5 = createDownStreamId(connectedPartition5);

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId1)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId1)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId2)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId2)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId3)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId3)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId4)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId4)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId5)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId5)));

    assertFalse(isGrouped(connectedPartition1));
    assertFalse(isGrouped(connectedPartition2));
    assertFalse(isGrouped(connectedPartition3));
    assertFalse(isGrouped(connectedPartition4));
    assertFalse(isGrouped(connectedPartition5));
  }