Example #1
0
  /**
   * Tests that offsets are properly committed to ZooKeeper and initial offsets are read from
   * ZooKeeper.
   *
   * <p>This test is only applicable if the Flink Kafka Consumer uses the ZooKeeperOffsetHandler.
   */
  @Test(timeout = 60000)
  public void testOffsetInZookeeper() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 99 to each of the 3 partitions.
    final String topicName = writeSequence("testOffsetInZK", 100, parallelism, 1);

    StreamExecutionEnvironment env1 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env1.getConfig().disableSysoutLogging();
    env1.enableCheckpointing(50);
    env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env1.setParallelism(parallelism);

    StreamExecutionEnvironment env2 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.enableCheckpointing(50);
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);

    readSequence(env1, standardProps, parallelism, topicName, 100, 0);

    CuratorFramework curatorClient = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();

    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorClient, standardProps.getProperty("group.id"), topicName, 2);

    LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3);

    assertTrue(o1 == null || (o1 >= 0 && o1 <= 100));
    assertTrue(o2 == null || (o2 >= 0 && o2 <= 100));
    assertTrue(o3 == null || (o3 >= 0 && o3 <= 100));

    LOG.info("Manipulating offsets");

    // set the offset to 50 for the three partitions
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 0, 49);
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 1, 49);
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topicName, 2, 49);

    curatorClient.close();

    // create new env
    readSequence(env2, standardProps, parallelism, topicName, 50, 50);

    deleteTestTopic(topicName);
  }
Example #2
0
  private static JobGraph createJobGraphWithKeyedState(
      int parallelism,
      int maxParallelism,
      int numberKeys,
      int numberElements,
      boolean terminateAfterEmission,
      int checkpointingInterval) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());

    DataStream<Integer> input =
        env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission))
            .keyBy(
                new KeySelector<Integer, Integer>() {
                  private static final long serialVersionUID = -7952298871120320940L;

                  @Override
                  public Integer getKey(Integer value) throws Exception {
                    return value;
                  }
                });

    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

    DataStream<Tuple2<Integer, Integer>> result =
        input.flatMap(new SubtaskIndexFlatMapper(numberElements));

    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

    return env.getStreamGraph().getJobGraph();
  }
Example #3
0
  @Test(timeout = 60000)
  public void testOffsetAutocommitTest() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 99 to each of the 3 partitions.
    final String topicName = writeSequence("testOffsetAutocommit", 100, parallelism, 1);

    StreamExecutionEnvironment env =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    // NOTE: We are not enabling the checkpointing!
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);

    // the readSequence operation sleeps for 20 ms between each record.
    // setting a delay of 25*20 = 500 for the commit interval makes
    // sure that we commit roughly 3-4 times while reading, however
    // at least once.
    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.commit.interval.ms", "500");

    // read so that the offset can be committed to ZK
    readSequence(env, readProps, parallelism, topicName, 100, 0);

    // get the offset
    CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();

    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    curatorFramework.close();
    LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3);

    // ensure that the offset has been committed
    boolean atLeastOneOffsetSet =
        (o1 != null && o1 > 0 && o1 <= 100)
            || (o2 != null && o2 > 0 && o2 <= 100)
            || (o3 != null && o3 > 0 && o3 <= 100);
    assertTrue(
        "Expecting at least one offset to be set o1=" + o1 + " o2=" + o2 + " o3=" + o3,
        atLeastOneOffsetSet);

    deleteTestTopic(topicName);
  }
Example #4
0
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
Example #5
0
  /**
   * Verify that the user-specified state backend is used even if checkpointing is disabled.
   *
   * @throws Exception
   */
  @Test
  public void testStateBackendWithoutCheckpointing() throws Exception {

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);

    see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    see.setStateBackend(new FailingStateBackend());

    see.fromElements(new Tuple2<>("Hello", 1))
        .keyBy(0)
        .map(
            new RichMapFunction<Tuple2<String, Integer>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                getRuntimeContext().getKeyValueState("test", String.class, "");
              }

              @Override
              public String map(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
              }
            })
        .print();

    try {
      see.execute();
      fail();
    } catch (JobExecutionException e) {
      Throwable t = e.getCause();
      if (!(t != null && t.getCause() instanceof SuccessException)) {
        throw e;
      }
    }
  }
Example #6
0
  /**
   * This test ensures that when the consumers retrieve some start offset from kafka (earliest,
   * latest), that this offset is committed to Zookeeper, even if some partitions are not read
   *
   * <p>Test: - Create 3 topics - write 50 messages into each. - Start three consumers with
   * auto.offset.reset='latest' and wait until they committed into ZK. - Check if the offsets in ZK
   * are set to 50 for the three partitions
   *
   * <p>See FLINK-3440 as well
   */
  @Test(timeout = 60000)
  public void testKafkaOffsetRetrievalToZookeeper() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 49 to each of the 3 partitions.
    final String topicName = writeSequence("testKafkaOffsetToZk", 50, parallelism, 1);

    final StreamExecutionEnvironment env2 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);
    env2.enableCheckpointing(200);

    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", "latest");

    DataStream<String> stream =
        env2.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
    stream.addSink(new DiscardingSink<String>());

    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner =
        new Thread("runner") {
          @Override
          public void run() {
            try {
              env2.execute();
            } catch (Throwable t) {
              if (!(t.getCause() instanceof JobCancellationException)) {
                errorRef.set(t);
              }
            }
          }
        };
    runner.start();

    final CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();
    final Long l49 = 49L;

    final long deadline = 30000 + System.currentTimeMillis();
    do {
      Long o1 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
      Long o2 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
      Long o3 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 2);

      if (l49.equals(o1) && l49.equals(o2) && l49.equals(o3)) {
        break;
      }

      Thread.sleep(100);
    } while (System.currentTimeMillis() < deadline);

    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));

    final Throwable t = errorRef.get();
    if (t != null) {
      throw new RuntimeException("Job failed with an exception", t);
    }

    // check if offsets are correctly in ZK
    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    Assert.assertEquals(Long.valueOf(49L), o1);
    Assert.assertEquals(Long.valueOf(49L), o2);
    Assert.assertEquals(Long.valueOf(49L), o3);

    curatorFramework.close();
  }