Ejemplo n.º 1
0
  private static Integer createDownStreamId(ConnectedDataStream dataStream) {
    SingleOutputStreamOperator coMap =
        dataStream.map(
            new CoMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Object>() {
              @Override
              public Object map1(Tuple2<Long, Long> value) {
                return null;
              }

              @Override
              public Object map2(Tuple2<Long, Long> value) {
                return null;
              }
            });
    coMap.addSink(new NoOpSink());
    return coMap.getId();
  }
Ejemplo n.º 2
0
 private static boolean isGrouped(ConnectedDataStream dataStream) {
   return (dataStream.getFirst() instanceof GroupedDataStream
       && dataStream.getSecond() instanceof GroupedDataStream);
 }
Ejemplo n.º 3
0
  @Test
  public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    MapFunction<Long, Integer> mapFunction =
        new MapFunction<Long, Integer>() {
          @Override
          public Integer map(Long value) throws Exception {
            return null;
          }
        };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new NoOpSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));

    FlatMapFunction<Long, Integer> flatMapFunction =
        new FlatMapFunction<Long, Integer>() {
          @Override
          public void flatMap(Long value, Collector<Integer> out) throws Exception {}
        };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new NoOpSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));

    FilterFunction<Integer> filterFunction =
        new FilterFunction<Integer>() {
          @Override
          public boolean filter(Integer value) throws Exception {
            return false;
          }
        };

    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);

    unionFilter.addSink(new NoOpSink<Integer>());

    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    OutputSelector<Integer> outputSelector =
        new OutputSelector<Integer>() {
          @Override
          public Iterable<String> select(Integer value) {
            return null;
          }
        };

    SplitDataStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new NoOpSink<Integer>());
    List<OutputSelector<?>> outputSelectors =
        env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));

    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();

    StreamEdge splitEdge =
        env.getStreamGraph().getStreamEdge(unionFilter.getId(), sink.getTransformation().getId());
    assertEquals("a", splitEdge.getSelectedNames().get(0));

    ConnectedDataStream<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper =
        new CoMapFunction<Integer, Integer, String>() {
          @Override
          public String map1(Integer value) {
            return null;
          }

          @Override
          public String map2(Integer value) {
            return null;
          }
        };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new NoOpSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }
  }
Ejemplo n.º 4
0
  /**
   * Tests that {@link DataStream#groupBy} and {@link DataStream#partitionByHash} result in
   * different and correct topologies. Does the some for the {@link ConnectedDataStream}.
   */
  @Test
  @SuppressWarnings("unchecked")
  public void testPartitioning() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream src1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
    DataStream src2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
    ConnectedDataStream connected = src1.connect(src2);

    // Testing DataStream grouping
    DataStream group1 = src1.groupBy(0);
    DataStream group2 = src1.groupBy(1, 0);
    DataStream group3 = src1.groupBy("f0");
    DataStream group4 = src1.groupBy(new FirstSelector());

    int id1 = createDownStreamId(group1);
    int id2 = createDownStreamId(group2);
    int id3 = createDownStreamId(group3);
    int id4 = createDownStreamId(group4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id4)));

    assertTrue(isGrouped(group1));
    assertTrue(isGrouped(group2));
    assertTrue(isGrouped(group3));
    assertTrue(isGrouped(group4));

    // Testing DataStream partitioning
    DataStream partition1 = src1.partitionByHash(0);
    DataStream partition2 = src1.partitionByHash(1, 0);
    DataStream partition3 = src1.partitionByHash("f0");
    DataStream partition4 = src1.partitionByHash(new FirstSelector());

    int pid1 = createDownStreamId(partition1);
    int pid2 = createDownStreamId(partition2);
    int pid3 = createDownStreamId(partition3);
    int pid4 = createDownStreamId(partition4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid4)));

    assertFalse(isGrouped(partition1));
    assertFalse(isGrouped(partition3));
    assertFalse(isGrouped(partition2));
    assertFalse(isGrouped(partition4));

    // Testing DataStream custom partitioning
    Partitioner<Long> longPartitioner =
        new Partitioner<Long>() {
          @Override
          public int partition(Long key, int numPartitions) {
            return 100;
          }
        };

    DataStream customPartition1 = src1.partitionCustom(longPartitioner, 0);
    DataStream customPartition3 = src1.partitionCustom(longPartitioner, "f0");
    DataStream customPartition4 = src1.partitionCustom(longPartitioner, new FirstSelector());

    int cid1 = createDownStreamId(customPartition1);
    int cid2 = createDownStreamId(customPartition3);
    int cid3 = createDownStreamId(customPartition4);

    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid1)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid2)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid3)));

    assertFalse(isGrouped(customPartition1));
    assertFalse(isGrouped(customPartition3));
    assertFalse(isGrouped(customPartition4));

    // Testing ConnectedDataStream grouping
    ConnectedDataStream connectedGroup1 = connected.groupBy(0, 0);
    Integer downStreamId1 = createDownStreamId(connectedGroup1);

    ConnectedDataStream connectedGroup2 = connected.groupBy(new int[] {0}, new int[] {0});
    Integer downStreamId2 = createDownStreamId(connectedGroup2);

    ConnectedDataStream connectedGroup3 = connected.groupBy("f0", "f0");
    Integer downStreamId3 = createDownStreamId(connectedGroup3);

    ConnectedDataStream connectedGroup4 =
        connected.groupBy(new String[] {"f0"}, new String[] {"f0"});
    Integer downStreamId4 = createDownStreamId(connectedGroup4);

    ConnectedDataStream connectedGroup5 =
        connected.groupBy(new FirstSelector(), new FirstSelector());
    Integer downStreamId5 = createDownStreamId(connectedGroup5);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId1)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId2)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId3)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId4)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId4)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId5)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId5)));

    assertTrue(isGrouped(connectedGroup1));
    assertTrue(isGrouped(connectedGroup2));
    assertTrue(isGrouped(connectedGroup3));
    assertTrue(isGrouped(connectedGroup4));
    assertTrue(isGrouped(connectedGroup5));

    // Testing ConnectedDataStream partitioning
    ConnectedDataStream connectedPartition1 = connected.partitionByHash(0, 0);
    Integer connectDownStreamId1 = createDownStreamId(connectedPartition1);

    ConnectedDataStream connectedPartition2 =
        connected.partitionByHash(new int[] {0}, new int[] {0});
    Integer connectDownStreamId2 = createDownStreamId(connectedPartition2);

    ConnectedDataStream connectedPartition3 = connected.partitionByHash("f0", "f0");
    Integer connectDownStreamId3 = createDownStreamId(connectedPartition3);

    ConnectedDataStream connectedPartition4 =
        connected.partitionByHash(new String[] {"f0"}, new String[] {"f0"});
    Integer connectDownStreamId4 = createDownStreamId(connectedPartition4);

    ConnectedDataStream connectedPartition5 =
        connected.partitionByHash(new FirstSelector(), new FirstSelector());
    Integer connectDownStreamId5 = createDownStreamId(connectedPartition5);

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId1)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId1)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId2)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId2)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId3)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId3)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId4)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId4)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId5)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId5)));

    assertFalse(isGrouped(connectedPartition1));
    assertFalse(isGrouped(connectedPartition2));
    assertFalse(isGrouped(connectedPartition3));
    assertFalse(isGrouped(connectedPartition4));
    assertFalse(isGrouped(connectedPartition5));
  }