/** * Tests that offsets are properly committed to ZooKeeper and initial offsets are read from * ZooKeeper. * * <p>This test is only applicable if the Flink Kafka Consumer uses the ZooKeeperOffsetHandler. */ @Test(timeout = 60000) public void testOffsetInZookeeper() throws Exception { final int parallelism = 3; // write a sequence from 0 to 99 to each of the 3 partitions. final String topicName = writeSequence("testOffsetInZK", 100, parallelism, 1); StreamExecutionEnvironment env1 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env1.getConfig().disableSysoutLogging(); env1.enableCheckpointing(50); env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env1.setParallelism(parallelism); StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env2.getConfig().disableSysoutLogging(); env2.enableCheckpointing(50); env2.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env2.setParallelism(parallelism); readSequence(env1, standardProps, parallelism, topicName, 100, 0); CuratorFramework curatorClient = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient(); Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 2); LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3); assertTrue(o1 == null || (o1 >= 0 && o1 <= 100)); assertTrue(o2 == null || (o2 >= 0 && o2 <= 100)); assertTrue(o3 == null || (o3 >= 0 && o3 <= 100)); LOG.info("Manipulating offsets"); // set the offset to 50 for the three partitions ZookeeperOffsetHandler.setOffsetInZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 0, 49); ZookeeperOffsetHandler.setOffsetInZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 1, 49); ZookeeperOffsetHandler.setOffsetInZooKeeper( curatorClient, standardProps.getProperty("group.id"), topicName, 2, 49); curatorClient.close(); // create new env readSequence(env2, standardProps, parallelism, topicName, 50, 50); deleteTestTopic(topicName); }
private static JobGraph createJobGraphWithKeyedState( int parallelism, int maxParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(checkpointingInterval); env.setRestartStrategy(RestartStrategies.noRestart()); DataStream<Integer> input = env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission)) .keyBy( new KeySelector<Integer, Integer>() { private static final long serialVersionUID = -7952298871120320940L; @Override public Integer getKey(Integer value) throws Exception { return value; } }); SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys); DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements)); result.addSink(new CollectionSink<Tuple2<Integer, Integer>>()); return env.getStreamGraph().getJobGraph(); }
@Test(timeout = 60000) public void testOffsetAutocommitTest() throws Exception { final int parallelism = 3; // write a sequence from 0 to 99 to each of the 3 partitions. final String topicName = writeSequence("testOffsetAutocommit", 100, parallelism, 1); StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); // NOTE: We are not enabling the checkpointing! env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.setParallelism(parallelism); // the readSequence operation sleeps for 20 ms between each record. // setting a delay of 25*20 = 500 for the commit interval makes // sure that we commit roughly 3-4 times while reading, however // at least once. Properties readProps = new Properties(); readProps.putAll(standardProps); readProps.setProperty("auto.commit.interval.ms", "500"); // read so that the offset can be committed to ZK readSequence(env, readProps, parallelism, topicName, 100, 0); // get the offset CuratorFramework curatorFramework = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient(); Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 2); curatorFramework.close(); LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3); // ensure that the offset has been committed boolean atLeastOneOffsetSet = (o1 != null && o1 > 0 && o1 <= 100) || (o2 != null && o2 > 0 && o2 <= 100) || (o3 != null && o3 > 0 && o3 <= 100); assertTrue( "Expecting at least one offset to be set o1=" + o1 + " o2=" + o2 + " o3=" + o3, atLeastOneOffsetSet); deleteTestTopic(topicName); }
private static JobGraph createJobGraphWithOperatorState( int parallelism, int maxParallelism, boolean partitionedOperatorState) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(Long.MAX_VALUE); env.setRestartStrategy(RestartStrategies.noRestart()); StateSourceBase.workStartedLatch = new CountDownLatch(1); DataStream<Integer> input = env.addSource( partitionedOperatorState ? new PartitionedStateSource() : new NonPartitionedStateSource()); input.addSink(new DiscardingSink<Integer>()); return env.getStreamGraph().getJobGraph(); }
/** * Verify that the user-specified state backend is used even if checkpointing is disabled. * * @throws Exception */ @Test public void testStateBackendWithoutCheckpointing() throws Exception { StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); see.getConfig().setRestartStrategy(RestartStrategies.noRestart()); see.setStateBackend(new FailingStateBackend()); see.fromElements(new Tuple2<>("Hello", 1)) .keyBy(0) .map( new RichMapFunction<Tuple2<String, Integer>, String>() { private static final long serialVersionUID = 1L; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); getRuntimeContext().getKeyValueState("test", String.class, ""); } @Override public String map(Tuple2<String, Integer> value) throws Exception { return value.f0; } }) .print(); try { see.execute(); fail(); } catch (JobExecutionException e) { Throwable t = e.getCause(); if (!(t != null && t.getCause() instanceof SuccessException)) { throw e; } } }
/** * This test ensures that when the consumers retrieve some start offset from kafka (earliest, * latest), that this offset is committed to Zookeeper, even if some partitions are not read * * <p>Test: - Create 3 topics - write 50 messages into each. - Start three consumers with * auto.offset.reset='latest' and wait until they committed into ZK. - Check if the offsets in ZK * are set to 50 for the three partitions * * <p>See FLINK-3440 as well */ @Test(timeout = 60000) public void testKafkaOffsetRetrievalToZookeeper() throws Exception { final int parallelism = 3; // write a sequence from 0 to 49 to each of the 3 partitions. final String topicName = writeSequence("testKafkaOffsetToZk", 50, parallelism, 1); final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env2.getConfig().disableSysoutLogging(); env2.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env2.setParallelism(parallelism); env2.enableCheckpointing(200); Properties readProps = new Properties(); readProps.putAll(standardProps); readProps.setProperty("auto.offset.reset", "latest"); DataStream<String> stream = env2.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps)); stream.addSink(new DiscardingSink<String>()); final AtomicReference<Throwable> errorRef = new AtomicReference<>(); final Thread runner = new Thread("runner") { @Override public void run() { try { env2.execute(); } catch (Throwable t) { if (!(t.getCause() instanceof JobCancellationException)) { errorRef.set(t); } } } }; runner.start(); final CuratorFramework curatorFramework = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient(); final Long l49 = 49L; final long deadline = 30000 + System.currentTimeMillis(); do { Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 2); if (l49.equals(o1) && l49.equals(o2) && l49.equals(o3)) { break; } Thread.sleep(100); } while (System.currentTimeMillis() < deadline); // cancel the job JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout)); final Throwable t = errorRef.get(); if (t != null) { throw new RuntimeException("Job failed with an exception", t); } // check if offsets are correctly in ZK Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 2); Assert.assertEquals(Long.valueOf(49L), o1); Assert.assertEquals(Long.valueOf(49L), o2); Assert.assertEquals(Long.valueOf(49L), o3); curatorFramework.close(); }