@Override public <T> DataStreamSink<T> produceIntoKafka( DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, KafkaPartitioner<T> partitioner) { FlinkKafkaProducer08<T> prod = new FlinkKafkaProducer08<>(topic, serSchema, props, partitioner); prod.setFlushOnCheckpoint(true); return stream.addSink(prod); }
private static JobGraph createJobGraphWithOperatorState( int parallelism, int maxParallelism, boolean partitionedOperatorState) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(Long.MAX_VALUE); env.setRestartStrategy(RestartStrategies.noRestart()); StateSourceBase.workStartedLatch = new CountDownLatch(1); DataStream<Integer> input = env.addSource( partitionedOperatorState ? new PartitionedStateSource() : new NonPartitionedStateSource()); input.addSink(new DiscardingSink<Integer>()); return env.getStreamGraph().getJobGraph(); }
/** * This test ensures that when the consumers retrieve some start offset from kafka (earliest, * latest), that this offset is committed to Zookeeper, even if some partitions are not read * * <p>Test: - Create 3 topics - write 50 messages into each. - Start three consumers with * auto.offset.reset='latest' and wait until they committed into ZK. - Check if the offsets in ZK * are set to 50 for the three partitions * * <p>See FLINK-3440 as well */ @Test(timeout = 60000) public void testKafkaOffsetRetrievalToZookeeper() throws Exception { final int parallelism = 3; // write a sequence from 0 to 49 to each of the 3 partitions. final String topicName = writeSequence("testKafkaOffsetToZk", 50, parallelism, 1); final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env2.getConfig().disableSysoutLogging(); env2.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env2.setParallelism(parallelism); env2.enableCheckpointing(200); Properties readProps = new Properties(); readProps.putAll(standardProps); readProps.setProperty("auto.offset.reset", "latest"); DataStream<String> stream = env2.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps)); stream.addSink(new DiscardingSink<String>()); final AtomicReference<Throwable> errorRef = new AtomicReference<>(); final Thread runner = new Thread("runner") { @Override public void run() { try { env2.execute(); } catch (Throwable t) { if (!(t.getCause() instanceof JobCancellationException)) { errorRef.set(t); } } } }; runner.start(); final CuratorFramework curatorFramework = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient(); final Long l49 = 49L; final long deadline = 30000 + System.currentTimeMillis(); do { Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 2); if (l49.equals(o1) && l49.equals(o2) && l49.equals(o3)) { break; } Thread.sleep(100); } while (System.currentTimeMillis() < deadline); // cancel the job JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout)); final Throwable t = errorRef.get(); if (t != null) { throw new RuntimeException("Job failed with an exception", t); } // check if offsets are correctly in ZK Long o1 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 0); Long o2 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 1); Long o3 = ZookeeperOffsetHandler.getOffsetFromZooKeeper( curatorFramework, standardProps.getProperty("group.id"), topicName, 2); Assert.assertEquals(Long.valueOf(49L), o1); Assert.assertEquals(Long.valueOf(49L), o2); Assert.assertEquals(Long.valueOf(49L), o3); curatorFramework.close(); }
@Test public void operatorTest() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Long> src = env.generateSequence(0, 0); MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() { @Override public Integer map(Long value) throws Exception { return null; } }; DataStream<Integer> map = src.map(mapFunction); map.addSink(new NoOpSink<Integer>()); assertEquals(mapFunction, getFunctionForDataStream(map)); FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() { private static final long serialVersionUID = 1L; @Override public void flatMap(Long value, Collector<Integer> out) throws Exception {} }; DataStream<Integer> flatMap = src.flatMap(flatMapFunction); flatMap.addSink(new NoOpSink<Integer>()); assertEquals(flatMapFunction, getFunctionForDataStream(flatMap)); FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() { @Override public boolean filter(Integer value) throws Exception { return false; } }; DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction); unionFilter.addSink(new NoOpSink<Integer>()); assertEquals(filterFunction, getFunctionForDataStream(unionFilter)); try { env.getStreamGraph().getStreamEdge(map.getId(), unionFilter.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } try { env.getStreamGraph().getStreamEdge(flatMap.getId(), unionFilter.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() { @Override public Iterable<String> select(Integer value) { return null; } }; SplitStream<Integer> split = unionFilter.split(outputSelector); split.select("dummy").addSink(new NoOpSink<Integer>()); List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors(); assertEquals(1, outputSelectors.size()); assertEquals(outputSelector, outputSelectors.get(0)); DataStream<Integer> select = split.select("a"); DataStreamSink<Integer> sink = select.print(); StreamEdge splitEdge = env.getStreamGraph().getStreamEdge(unionFilter.getId(), sink.getTransformation().getId()); assertEquals("a", splitEdge.getSelectedNames().get(0)); ConnectedStreams<Integer, Integer> connect = map.connect(flatMap); CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() { private static final long serialVersionUID = 1L; @Override public String map1(Integer value) { return null; } @Override public String map2(Integer value) { return null; } }; DataStream<String> coMap = connect.map(coMapper); coMap.addSink(new NoOpSink<String>()); assertEquals(coMapper, getFunctionForDataStream(coMap)); try { env.getStreamGraph().getStreamEdge(map.getId(), coMap.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } try { env.getStreamGraph().getStreamEdge(flatMap.getId(), coMap.getId()); } catch (RuntimeException e) { fail(e.getMessage()); } }
/** Tests whether parallelism gets set. */ @Test public void testParallelism() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Tuple2<Long, Long>> src = env.fromElements(new Tuple2<>(0L, 0L)); env.setParallelism(10); SingleOutputStreamOperator<Long, ?> map = src.map( new MapFunction<Tuple2<Long, Long>, Long>() { @Override public Long map(Tuple2<Long, Long> value) throws Exception { return null; } }) .name("MyMap"); DataStream<Long> windowed = map.windowAll(GlobalWindows.create()) .trigger(PurgingTrigger.of(CountTrigger.of(10))) .fold( 0L, new FoldFunction<Long, Long>() { @Override public Long fold(Long accumulator, Long value) throws Exception { return null; } }); windowed.addSink(new NoOpSink<Long>()); DataStreamSink<Long> sink = map.addSink( new SinkFunction<Long>() { private static final long serialVersionUID = 1L; @Override public void invoke(Long value) throws Exception {} }); assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism()); assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism()); assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism()); assertEquals( 10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism()); env.setParallelism(7); // Some parts, such as windowing rely on the fact that previous operators have a parallelism // set when instantiating the Discretizer. This would break if we dynamically changed // the parallelism of operations when changing the setting on the Execution Environment. assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism()); assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism()); assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism()); assertEquals( 10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism()); try { src.setParallelism(3); fail(); } catch (IllegalArgumentException success) { // do nothing } DataStreamSource<Long> parallelSource = env.generateSequence(0, 0); parallelSource.addSink(new NoOpSink<Long>()); assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism()); parallelSource.setParallelism(3); assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism()); map.setParallelism(2); assertEquals(2, env.getStreamGraph().getStreamNode(map.getId()).getParallelism()); sink.setParallelism(4); assertEquals( 4, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism()); }
/** * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to produce rolling * files. The clock of DateTimeBucketer is set to {@link ModifyableClock} to keep the time in * lockstep with the processing of elements using latches. */ @Test public void testDateTimeRollingStringWriter() throws Exception { final int NUM_ELEMENTS = 20; final int PARALLELISM = 2; final String outPath = hdfsURI + "/rolling-out"; DateTimeBucketer.setClock(new ModifyableClock()); ModifyableClock.setCurrentTime(0); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast(); // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can // fire the latch DataStream<String> mapped = source.flatMap( new RichFlatMapFunction<Tuple2<Integer, String>, String>() { private static final long serialVersionUID = 1L; int count = 0; @Override public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception { out.collect(value.f1); count++; if (count >= 5) { if (getRuntimeContext().getIndexOfThisSubtask() == 0) { latch1.trigger(); } else { latch2.trigger(); } count = 0; } } }); RollingSink<String> sink = new RollingSink<String>(outPath) .setBucketer(new DateTimeBucketer("ss")) .setPartPrefix("part") .setPendingPrefix("") .setPendingSuffix(""); mapped.addSink(sink); env.execute("RollingSink String Write Test"); RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); // we should have 8 rolling files, 4 time intervals and parallelism of 2 int numFiles = 0; while (files.hasNext()) { LocatedFileStatus file = files.next(); numFiles++; if (file.getPath().toString().contains("rolling-out/00")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 0; i < 5; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/05")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 5; i < 10; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/10")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 10; i < 15; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/15")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 15; i < 20; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else { Assert.fail("File " + file + " does not match any expected roll pattern."); } } Assert.assertEquals(8, numFiles); }