/**
   * Tests that offsets are properly committed to ZooKeeper and initial offsets are read from
   * ZooKeeper.
   *
   * <p>This test is only applicable if the Flink Kafka Consumer uses the ZooKeeperOffsetHandler.
   */
  @Test(timeout = 60000)
  public void testOffsetInZookeeper() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 99 to each of the 3 partitions.
    final String topicName = writeSequence("testOffsetInZK", 100, parallelism, 1);

    StreamExecutionEnvironment env1 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env1.getConfig().disableSysoutLogging();
    env1.enableCheckpointing(50);
    env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env1.setParallelism(parallelism);

    StreamExecutionEnvironment env2 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.enableCheckpointing(50);
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);

    readSequence(env1, standardProps, parallelism, topicName, 100, 0);

    CuratorFramework curatorClient = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();

    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 2);

    LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3);

    assertTrue(o1 == null || (o1 >= 0 && o1 <= 100));
    assertTrue(o2 == null || (o2 >= 0 && o2 <= 100));
    assertTrue(o3 == null || (o3 >= 0 && o3 <= 100));

    LOG.info("Manipulating offsets");

    // set the offset to 50 for the three partitions
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 0, 49);
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 1, 49);
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 2, 49);

    curatorClient.close();

    // create new env
    readSequence(env2, standardProps, parallelism, topicName, 50, 50);

    deleteTestTopic(topicName);
  }
  private static JobGraph createJobGraphWithKeyedState(
      int parallelism,
      int maxParallelism,
      int numberKeys,
      int numberElements,
      boolean terminateAfterEmission,
      int checkpointingInterval) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());

    DataStream<Integer> input =
        env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission))
            .keyBy(
                new KeySelector<Integer, Integer>() {
                  private static final long serialVersionUID = -7952298871120320940L;

                  @Override
                  public Integer getKey(Integer value) throws Exception {
                    return value;
                  }
                });

    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

    DataStream<Tuple2<Integer, Integer>> result =
        input.flatMap(new SubtaskIndexFlatMapper(numberElements));

    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

    return env.getStreamGraph().getJobGraph();
  }
  /**
   * Note: this test fails if we don't have the synchronized block in {@link
   * org.apache.flink.streaming.runtime.tasks.SourceStreamTask.SourceOutput}
   */
  @Test
  public void testOneInputOperatorWithoutChaining() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);

    DataStream<String> source = env.addSource(new InfiniteTestSource());

    source.transform(
        "Custom Operator",
        BasicTypeInfo.STRING_TYPE_INFO,
        new TimerOperator(StreamOperator.ChainingStrategy.NEVER));

    boolean testSuccess = false;
    try {
      env.execute("Timer test");
    } catch (JobExecutionException e) {
      if (e.getCause() instanceof TimerException) {
        TimerException te = (TimerException) e.getCause();
        if (te.getCause() instanceof RuntimeException) {
          RuntimeException re = (RuntimeException) te.getCause();
          if (re.getMessage().equals("TEST SUCCESS")) {
            testSuccess = true;
          } else {
            throw e;
          }
        } else {
          throw e;
        }
      } else {
        throw e;
      }
    }
    Assert.assertTrue(testSuccess);
  }
  /** This tests {@link StringWriter} with non-rolling output. */
  @Test
  public void testNonRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new NonRollingBucketer())
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    source
        .map(
            new MapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public String map(Tuple2<Integer, String> value) throws Exception {
                return value.f1;
              }
            })
        .addSink(sink);

    env.execute("RollingSink String Write Test");

    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();

    inStream = dfs.open(new Path(outPath + "/part-1-0"));

    br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();
  }
  @Test(timeout = 60000)
  public void testOffsetAutocommitTest() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 99 to each of the 3 partitions.
    final String topicName = writeSequence("testOffsetAutocommit", 100, parallelism, 1);

    StreamExecutionEnvironment env =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    // NOTE: We are not enabling the checkpointing!
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);

    // the readSequence operation sleeps for 20 ms between each record.
    // setting a delay of 25*20 = 500 for the commit interval makes
    // sure that we commit roughly 3-4 times while reading, however
    // at least once.
    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.commit.interval.ms", "500");

    // read so that the offset can be committed to ZK
    readSequence(env, readProps, parallelism, topicName, 100, 0);

    // get the offset
    CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();

    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    curatorFramework.close();
    LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3);

    // ensure that the offset has been committed
    boolean atLeastOneOffsetSet =
        (o1 != null && o1 > 0 && o1 <= 100)
            || (o2 != null && o2 > 0 && o2 <= 100)
            || (o3 != null && o3 > 0 && o3 <= 100);
    assertTrue(
        "Expecting at least one offset to be set o1=" + o1 + " o2=" + o2 + " o3=" + o3,
        atLeastOneOffsetSet);

    deleteTestTopic(topicName);
  }
Exemple #6
0
  public static void generateLongStringTupleSequence(
      StreamExecutionEnvironment env,
      String brokerConnection,
      String topic,
      int numPartitions,
      final int from,
      final int to)
      throws Exception {

    TypeInformation<Tuple2<Integer, Integer>> resultType =
        TypeInfoParser.parse("Tuple2<Integer, Integer>");

    env.setParallelism(numPartitions);
    env.getConfig().disableSysoutLogging();
    env.setNumberOfExecutionRetries(0);

    DataStream<Tuple2<Integer, Integer>> stream =
        env.addSource(
            new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

              private volatile boolean running = true;

              @Override
              public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                int cnt = from;
                int partition = getRuntimeContext().getIndexOfThisSubtask();

                while (running && cnt <= to) {
                  ctx.collect(new Tuple2<Integer, Integer>(partition, cnt));
                  cnt++;
                }
              }

              @Override
              public void cancel() {
                running = false;
              }
            });

    stream.addSink(
        new FlinkKafkaProducer<>(
            topic,
            new TypeInformationSerializationSchema<>(resultType, env.getConfig()),
            FlinkKafkaProducer.getPropertiesFromBrokerList(brokerConnection),
            new Tuple2Partitioner(numPartitions)));

    env.execute("Data generator (Int, Int) stream to topic " + topic);
  }
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
  /**
   * Verify that the user-specified state backend is used even if checkpointing is disabled.
   *
   * @throws Exception
   */
  @Test
  public void testStateBackendWithoutCheckpointing() throws Exception {

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);

    see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    see.setStateBackend(new FailingStateBackend());

    see.fromElements(new Tuple2<>("Hello", 1))
        .keyBy(0)
        .map(
            new RichMapFunction<Tuple2<String, Integer>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                getRuntimeContext().getKeyValueState("test", String.class, "");
              }

              @Override
              public String map(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
              }
            })
        .print();

    try {
      see.execute();
      fail();
    } catch (JobExecutionException e) {
      Throwable t = e.getCause();
      if (!(t != null && t.getCause() instanceof SuccessException)) {
        throw e;
      }
    }
  }
  @Test
  public void testTumblingTimeWindowWithKVState() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 100;
    final int NUM_KEYS = 100;
    FailingSource.reset();

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getLeaderRPCPort());

      env.setParallelism(PARALLELISM);
      env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
      env.enableCheckpointing(100);
      env.setNumberOfExecutionRetries(3);
      env.getConfig().disableSysoutLogging();

      env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3))
          .rebalance()
          .keyBy(0)
          .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS))
          .apply(
              new RichWindowFunction<
                  Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

                private boolean open = false;

                private OperatorState<Integer> count;

                @Override
                public void open(Configuration parameters) {
                  assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                  open = true;
                  count = getRuntimeContext().getKeyValueState("count", Integer.class, 0);
                }

                @Override
                public void apply(
                    Tuple tuple,
                    TimeWindow window,
                    Iterable<Tuple2<Long, IntType>> values,
                    Collector<Tuple4<Long, Long, Long, IntType>> out)
                    throws Exception {

                  // the window count state starts with the key, so that we get
                  // different count results for each key
                  if (count.value() == 0) {
                    count.update(tuple.<Long>getField(0).intValue());
                  }

                  // validate that the function has been opened properly
                  assertTrue(open);

                  count.update(count.value() + 1);
                  out.collect(
                      new Tuple4<>(
                          tuple.<Long>getField(0),
                          window.getStart(),
                          window.getEnd(),
                          new IntType(count.value())));
                }
              })
          .addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE))
          .setParallelism(1);

      tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Exemple #10
0
  /** Creates a Flink program that uses the specified spouts and bolts. */
  private void translateTopology() {

    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();

    // Storm defaults to parallelism 1
    env.setParallelism(1);

    /* Translation of topology */

    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
      final String spoutId = spout.getKey();
      final IRichSpout userSpout = spout.getValue();

      final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
      userSpout.declareOutputFields(declarer);
      final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
      this.outputStreams.put(spoutId, sourceStreams);
      declarers.put(spoutId, declarer);

      final HashMap<String, DataStream<Tuple>> outputStreams =
          new HashMap<String, DataStream<Tuple>>();
      final DataStreamSource<?> source;

      if (sourceStreams.size() == 1) {
        final SpoutWrapper<Tuple> spoutWrapperSingleOutput =
            new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
        spoutWrapperSingleOutput.setStormTopology(stormTopology);

        final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];

        DataStreamSource<Tuple> src =
            env.addSource(
                spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));

        outputStreams.put(outputStreamId, src);
        source = src;
      } else {
        final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs =
            new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
        spoutWrapperMultipleOutputs.setStormTopology(stormTopology);

        @SuppressWarnings({"unchecked", "rawtypes"})
        DataStreamSource<SplitStreamType<Tuple>> multiSource =
            env.addSource(
                spoutWrapperMultipleOutputs,
                spoutId,
                (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));

        SplitStream<SplitStreamType<Tuple>> splitSource =
            multiSource.split(new StormStreamSelector<Tuple>());
        for (String streamId : sourceStreams.keySet()) {
          SingleOutputStreamOperator<Tuple, ?> outStream =
              splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
          outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
          outputStreams.put(streamId, outStream);
        }
        source = multiSource;
      }
      availableInputs.put(spoutId, outputStreams);

      final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
      if (common.is_set_parallelism_hint()) {
        int dop = common.get_parallelism_hint();
        source.setParallelism(dop);
      } else {
        common.set_parallelism_hint(1);
      }
    }

    /**
     * 1. Connect all spout streams with bolts streams 2. Then proceed with the bolts stream already
     * connected
     *
     * <p>Because we do not know the order in which an iterator steps over a set, we might process a
     * consumer before its producer ->thus, we might need to repeat multiple times
     */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
      if (!makeProgress) {
        StringBuilder strBld = new StringBuilder();
        strBld.append("Unable to build Topology. Could not connect the following bolts:");
        for (String boltId : bolts.keySet()) {
          strBld.append("\n  ");
          strBld.append(boltId);
          strBld.append(": missing input streams [");
          for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
            strBld.append("'");
            strBld.append(streams.getKey().get_streamId());
            strBld.append("' from '");
            strBld.append(streams.getKey().get_componentId());
            strBld.append("'; ");
          }
          strBld.append("]");
        }

        throw new RuntimeException(strBld.toString());
      }
      makeProgress = false;

      final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
      while (boltsIterator.hasNext()) {

        final Entry<String, IRichBolt> bolt = boltsIterator.next();
        final String boltId = bolt.getKey();
        final IRichBolt userBolt = copyObject(bolt.getValue());

        final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();

        Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs =
            unprocessdInputsPerBolt.get(boltId);
        if (unprocessedBoltInputs == null) {
          unprocessedBoltInputs = new HashSet<>();
          unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
          unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
        }

        // check if all inputs are available
        final int numberOfInputs = unprocessedBoltInputs.size();
        int inputsAvailable = 0;
        for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
          final String producerId = entry.getKey().get_componentId();
          final String streamId = entry.getKey().get_streamId();
          final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
          if (streams != null && streams.get(streamId) != null) {
            inputsAvailable++;
          }
        }

        if (inputsAvailable != numberOfInputs) {
          // traverse other bolts first until inputs are available
          continue;
        } else {
          makeProgress = true;
          boltsIterator.remove();
        }

        final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);

        for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
          final GlobalStreamId streamId = input.getKey();
          final Grouping grouping = input.getValue();

          final String producerId = streamId.get_componentId();

          final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);

          inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
        }

        final SingleOutputStreamOperator<?, ?> outputStream =
            createOutput(boltId, userBolt, inputStreams);

        if (common.is_set_parallelism_hint()) {
          int dop = common.get_parallelism_hint();
          outputStream.setParallelism(dop);
        } else {
          common.set_parallelism_hint(1);
        }
      }
    }
  }
Exemple #11
0
  /**
   * This test ensures that when the consumers retrieve some start offset from kafka (earliest,
   * latest), that this offset is committed to Zookeeper, even if some partitions are not read
   *
   * <p>Test: - Create 3 topics - write 50 messages into each. - Start three consumers with
   * auto.offset.reset='latest' and wait until they committed into ZK. - Check if the offsets in ZK
   * are set to 50 for the three partitions
   *
   * <p>See FLINK-3440 as well
   */
  @Test(timeout = 60000)
  public void testKafkaOffsetRetrievalToZookeeper() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 49 to each of the 3 partitions.
    final String topicName = writeSequence("testKafkaOffsetToZk", 50, parallelism, 1);

    final StreamExecutionEnvironment env2 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);
    env2.enableCheckpointing(200);

    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", "latest");

    DataStream<String> stream =
        env2.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
    stream.addSink(new DiscardingSink<String>());

    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner =
        new Thread("runner") {
          @Override
          public void run() {
            try {
              env2.execute();
            } catch (Throwable t) {
              if (!(t.getCause() instanceof JobCancellationException)) {
                errorRef.set(t);
              }
            }
          }
        };
    runner.start();

    final CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();
    final Long l49 = 49L;

    final long deadline = 30000 + System.currentTimeMillis();
    do {
      Long o1 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
      Long o2 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
      Long o3 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 2);

      if (l49.equals(o1) && l49.equals(o2) && l49.equals(o3)) {
        break;
      }

      Thread.sleep(100);
    } while (System.currentTimeMillis() < deadline);

    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));

    final Throwable t = errorRef.get();
    if (t != null) {
      throw new RuntimeException("Job failed with an exception", t);
    }

    // check if offsets are correctly in ZK
    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    Assert.assertEquals(Long.valueOf(49L), o1);
    Assert.assertEquals(Long.valueOf(49L), o2);
    Assert.assertEquals(Long.valueOf(49L), o3);

    curatorFramework.close();
  }
Exemple #12
0
  @Test
  public void testSimpleKeyedPatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);

    // (Event, timestamp)
    DataStream<Event> input =
        env.fromElements(
                Tuple2.of(new Event(1, "start", 1.0), 5L),
                Tuple2.of(new Event(1, "middle", 2.0), 1L),
                Tuple2.of(new Event(2, "middle", 2.0), 4L),
                Tuple2.of(new Event(2, "start", 2.0), 3L),
                Tuple2.of(new Event(1, "end", 3.0), 3L),
                Tuple2.of(new Event(3, "start", 4.1), 5L),
                Tuple2.of(new Event(1, "end", 4.0), 10L),
                Tuple2.of(new Event(2, "end", 2.0), 8L),
                Tuple2.of(new Event(1, "middle", 5.0), 7L),
                Tuple2.of(new Event(3, "middle", 6.0), 9L),
                Tuple2.of(new Event(3, "end", 7.0), 7L),
                // last element for high final watermark
                Tuple2.of(new Event(3, "end", 7.0), 100L))
            .assignTimestampsAndWatermarks(
                new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

                  @Override
                  public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
                    return element.f1;
                  }

                  @Override
                  public Watermark checkAndGetNextWatermark(
                      Tuple2<Event, Long> lastElement, long extractedTimestamp) {
                    return new Watermark(lastElement.f1 - 5);
                  }
                })
            .map(
                new MapFunction<Tuple2<Event, Long>, Event>() {

                  @Override
                  public Event map(Tuple2<Event, Long> value) throws Exception {
                    return value.f0;
                  }
                })
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "1,1,1\n2,2,2";

    env.execute();
  }
  /**
   * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to produce rolling
   * files. The clock of DateTimeBucketer is set to {@link ModifyableClock} to keep the time in
   * lockstep with the processing of elements using latches.
   */
  @Test
  public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped =
        source.flatMap(
            new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              int count = 0;

              @Override
              public void flatMap(Tuple2<Integer, String> value, Collector<String> out)
                  throws Exception {
                out.collect(value.f1);
                count++;
                if (count >= 5) {
                  if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                  } else {
                    latch2.trigger();
                  }
                  count = 0;
                }
              }
            });

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
      LocatedFileStatus file = files.next();
      numFiles++;
      if (file.getPath().toString().contains("rolling-out/00")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 0; i < 5; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/05")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 5; i < 10; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/10")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 10; i < 15; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/15")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 15; i < 20; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else {
        Assert.fail("File " + file + " does not match any expected roll pattern.");
      }
    }

    Assert.assertEquals(8, numFiles);
  }
  /** This tests {@link SequenceFileWriter} with non-rolling output but with compression. */
  @Test
  public void testNonRollingSequenceFileWithCompressionWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/seq-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());

    DataStream<Tuple2<IntWritable, Text>> mapped =
        source.map(
            new MapFunction<Tuple2<Integer, String>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<Integer, String> value) throws Exception {
                return Tuple2.of(new IntWritable(value.f0), new Text(value.f1));
              }
            });

    RollingSink<Tuple2<IntWritable, Text>> sink =
        new RollingSink<Tuple2<IntWritable, Text>>(outPath)
            .setWriter(
                new SequenceFileWriter<IntWritable, Text>(
                    "Default", SequenceFile.CompressionType.BLOCK))
            .setBucketer(new NonRollingBucketer())
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

    SequenceFile.Reader reader =
        new SequenceFile.Reader(inStream, 1000, 0, 100000, new Configuration());

    IntWritable intWritable = new IntWritable();
    Text txt = new Text();

    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
      reader.next(intWritable, txt);
      Assert.assertEquals(i, intWritable.get());
      Assert.assertEquals("message #" + i, txt.toString());
    }

    reader.close();
    inStream.close();

    inStream = dfs.open(new Path(outPath + "/part-1-0"));

    reader = new SequenceFile.Reader(inStream, 1000, 0, 100000, new Configuration());

    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
      reader.next(intWritable, txt);
      Assert.assertEquals(i, intWritable.get());
      Assert.assertEquals("message #" + i, txt.toString());
    }

    reader.close();
    inStream.close();
  }
Exemple #15
0
  public static void generateRandomizedIntegerSequence(
      StreamExecutionEnvironment env,
      String brokerConnection,
      String topic,
      final int numPartitions,
      final int numElements,
      final boolean randomizeOrder)
      throws Exception {
    env.setParallelism(numPartitions);
    env.getConfig().disableSysoutLogging();
    env.setNumberOfExecutionRetries(0);

    DataStream<Integer> stream =
        env.addSource(
            new RichParallelSourceFunction<Integer>() {

              private volatile boolean running = true;

              @Override
              public void run(SourceContext<Integer> ctx) {
                // create a sequence
                int[] elements = new int[numElements];
                for (int i = 0, val = getRuntimeContext().getIndexOfThisSubtask();
                    i < numElements;
                    i++, val += getRuntimeContext().getNumberOfParallelSubtasks()) {

                  elements[i] = val;
                }

                // scramble the sequence
                if (randomizeOrder) {
                  Random rnd = new Random();
                  for (int i = 0; i < elements.length; i++) {
                    int otherPos = rnd.nextInt(elements.length);

                    int tmp = elements[i];
                    elements[i] = elements[otherPos];
                    elements[otherPos] = tmp;
                  }
                }

                // emit the sequence
                int pos = 0;
                while (running && pos < elements.length) {
                  ctx.collect(elements[pos++]);
                }
              }

              @Override
              public void cancel() {
                running = false;
              }
            });

    stream
        .rebalance()
        .addSink(
            new FlinkKafkaProducer<>(
                topic,
                new TypeInformationSerializationSchema<>(
                    BasicTypeInfo.INT_TYPE_INFO, env.getConfig()),
                FlinkKafkaProducer.getPropertiesFromBrokerList(brokerConnection),
                new KafkaPartitioner() {
                  @Override
                  public int partition(Object key, int numPartitions) {
                    return ((Integer) key) % numPartitions;
                  }
                }));

    env.execute("Scrambles int sequence generator");
  }
  @Test
  public void testTumblingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 100;
    final int NUM_KEYS = 100;
    FailingSource.reset();

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getLeaderRPCPort());

      env.setParallelism(PARALLELISM);
      env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
      env.enableCheckpointing(100);
      env.setNumberOfExecutionRetries(3);
      env.getConfig().disableSysoutLogging();

      env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3))
          .rebalance()
          .keyBy(0)
          .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS))
          .apply(
              new RichWindowFunction<
                  Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

                private boolean open = false;

                @Override
                public void open(Configuration parameters) {
                  assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                  open = true;
                }

                @Override
                public void apply(
                    Tuple tuple,
                    TimeWindow window,
                    Iterable<Tuple2<Long, IntType>> values,
                    Collector<Tuple4<Long, Long, Long, IntType>> out) {

                  // validate that the function has been opened properly
                  assertTrue(open);

                  int sum = 0;
                  long key = -1;

                  for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                  }
                  out.collect(
                      new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
                }
              })
          .addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE))
          .setParallelism(1);

      tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Exemple #17
0
  @Test
  public void testSimpleKeyedPatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(2);

    DataStream<Event> input =
        env.fromElements(
                new Event(1, "barfoo", 1.0),
                new Event(2, "start", 2.0),
                new Event(3, "start", 2.1),
                new Event(3, "foobar", 3.0),
                new SubEvent(4, "foo", 4.0, 1.0),
                new SubEvent(3, "middle", 3.2, 1.0),
                new Event(42, "start", 3.1),
                new SubEvent(42, "middle", 3.3, 1.2),
                new Event(5, "middle", 5.0),
                new SubEvent(2, "middle", 6.0, 2.0),
                new SubEvent(7, "bar", 3.0, 3.0),
                new Event(42, "42", 42.0),
                new Event(3, "end", 2.0),
                new Event(2, "end", 1.0),
                new Event(42, "end", 42.0))
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .subtype(SubEvent.class)
            .where(
                new FilterFunction<SubEvent>() {

                  @Override
                  public boolean filter(SubEvent value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "2,2,2\n3,3,3\n42,42,42";

    env.execute();
  }
Exemple #18
0
  /** Tests whether parallelism gets set. */
  @Test
  public void testParallelism() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Tuple2<Long, Long>> src = env.fromElements(new Tuple2<>(0L, 0L));
    env.setParallelism(10);

    SingleOutputStreamOperator<Long, ?> map =
        src.map(
                new MapFunction<Tuple2<Long, Long>, Long>() {
                  @Override
                  public Long map(Tuple2<Long, Long> value) throws Exception {
                    return null;
                  }
                })
            .name("MyMap");

    DataStream<Long> windowed =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(10)))
            .fold(
                0L,
                new FoldFunction<Long, Long>() {
                  @Override
                  public Long fold(Long accumulator, Long value) throws Exception {
                    return null;
                  }
                });

    windowed.addSink(new NoOpSink<Long>());

    DataStreamSink<Long> sink =
        map.addSink(
            new SinkFunction<Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void invoke(Long value) throws Exception {}
            });

    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    env.setParallelism(7);

    // Some parts, such as windowing rely on the fact that previous operators have a parallelism
    // set when instantiating the Discretizer. This would break if we dynamically changed
    // the parallelism of operations when changing the setting on the Execution Environment.
    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    try {
      src.setParallelism(3);
      fail();
    } catch (IllegalArgumentException success) {
      // do nothing
    }

    DataStreamSource<Long> parallelSource = env.generateSequence(0, 0);
    parallelSource.addSink(new NoOpSink<Long>());
    assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    parallelSource.setParallelism(3);
    assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    map.setParallelism(2);
    assertEquals(2, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());

    sink.setParallelism(4);
    assertEquals(
        4, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
  }
  /**
   * Runs the following program:
   *
   * <pre>
   *     [ (source)->(filter)->(map) ] -> [ (map) ] -> [ (groupBy/reduce)->(sink) ]
   * </pre>
   */
  @Test
  public void runCheckpointedProgram() {

    final long NUM_STRINGS = 10000000L;
    assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getJobManagerRPCPort());
      env.setParallelism(PARALLELISM);
      env.enableCheckpointing(500);
      env.getConfig().disableSysoutLogging();

      DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS));

      stream
          // -------------- first vertex, chained to the source ----------------
          .filter(new StringRichFilterFunction())

          // -------------- seconds vertex - the stateful one that also fails ----------------
          .map(new StringPrefixCountRichMapFunction())
          .startNewChain()
          .map(new StatefulCounterFunction())

          // -------------- third vertex - reducer and the sink ----------------
          .groupBy("prefix")
          .reduce(new OnceFailingReducer(NUM_STRINGS))
          .addSink(
              new RichSinkFunction<PrefixCount>() {

                private Map<Character, Long> counts = new HashMap<Character, Long>();

                @Override
                public void invoke(PrefixCount value) {
                  Character first = value.prefix.charAt(0);
                  Long previous = counts.get(first);
                  if (previous == null) {
                    counts.put(first, value.count);
                  } else {
                    counts.put(first, Math.max(previous, value.count));
                  }
                }

                //						@Override
                //						public void close() {
                //							for (Long count : counts.values()) {
                //								assertEquals(NUM_STRINGS / 40, count.longValue());
                //							}
                //						}
              });

      env.execute();

      long filterSum = 0;
      for (long l : StringRichFilterFunction.counts) {
        filterSum += l;
      }

      long mapSum = 0;
      for (long l : StringPrefixCountRichMapFunction.counts) {
        mapSum += l;
      }

      long countSum = 0;
      for (long l : StatefulCounterFunction.counts) {
        countSum += l;
      }

      // verify that we counted exactly right

      // this line should be uncommented once the "exactly one off by one" is fixed
      // if this fails we see at which point the count is off
      assertEquals(NUM_STRINGS, filterSum);
      assertEquals(NUM_STRINGS, mapSum);
      assertEquals(NUM_STRINGS, countSum);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Exemple #20
0
  /**
   * Tests union functionality. This ensures that self-unions and unions of streams with differing
   * parallelism work.
   *
   * @throws Exception
   */
  @Test
  public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);

    DataStream<Long> input1 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> selfUnion =
        input1
            .union(input1)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> input6 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> selfUnionDifferentPartition =
        input6
            .broadcast()
            .union(input6)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> input2 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> input3 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(2);

    DataStream<Long> unionDifferingParallelism =
        input2
            .union(input3)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> input4 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(2);

    DataStream<Long> input5 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> unionDifferingPartitioning =
        input4
            .broadcast()
            .union(input5)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    StreamGraph streamGraph = env.getStreamGraph();

    // verify self union
    assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
      assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
    }

    // verify self union with differnt partitioners
    assertTrue(
        streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
    boolean hasForward = false;
    boolean hasBroadcast = false;
    for (StreamEdge edge :
        streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
      if (edge.getPartitioner() instanceof ForwardPartitioner) {
        hasForward = true;
      }
      if (edge.getPartitioner() instanceof BroadcastPartitioner) {
        hasBroadcast = true;
      }
    }
    assertTrue(hasForward && hasBroadcast);

    // verify union of streams with differing parallelism
    assertTrue(
        streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
    for (StreamEdge edge :
        streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
      if (edge.getSourceId() == input2.getId()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
      } else if (edge.getSourceId() == input3.getId()) {
        assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
      } else {
        fail("Wrong input edge.");
      }
    }

    // verify union of streams with differing partitionings
    assertTrue(
        streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
    for (StreamEdge edge :
        streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
      if (edge.getSourceId() == input4.getId()) {
        assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
      } else if (edge.getSourceId() == input5.getId()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
      } else {
        fail("Wrong input edge.");
      }
    }
  }