@Override
 public <T> DataStreamSink<T> produceIntoKafka(
     DataStream<T> stream,
     String topic,
     KeyedSerializationSchema<T> serSchema,
     Properties props,
     KafkaPartitioner<T> partitioner) {
   FlinkKafkaProducer08<T> prod = new FlinkKafkaProducer08<>(topic, serSchema, props, partitioner);
   prod.setFlushOnCheckpoint(true);
   return stream.addSink(prod);
 }
예제 #2
0
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
예제 #3
0
  /**
   * This test ensures that when the consumers retrieve some start offset from kafka (earliest,
   * latest), that this offset is committed to Zookeeper, even if some partitions are not read
   *
   * <p>Test: - Create 3 topics - write 50 messages into each. - Start three consumers with
   * auto.offset.reset='latest' and wait until they committed into ZK. - Check if the offsets in ZK
   * are set to 50 for the three partitions
   *
   * <p>See FLINK-3440 as well
   */
  @Test(timeout = 60000)
  public void testKafkaOffsetRetrievalToZookeeper() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 49 to each of the 3 partitions.
    final String topicName = writeSequence("testKafkaOffsetToZk", 50, parallelism, 1);

    final StreamExecutionEnvironment env2 =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);
    env2.enableCheckpointing(200);

    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", "latest");

    DataStream<String> stream =
        env2.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
    stream.addSink(new DiscardingSink<String>());

    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner =
        new Thread("runner") {
          @Override
          public void run() {
            try {
              env2.execute();
            } catch (Throwable t) {
              if (!(t.getCause() instanceof JobCancellationException)) {
                errorRef.set(t);
              }
            }
          }
        };
    runner.start();

    final CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();
    final Long l49 = 49L;

    final long deadline = 30000 + System.currentTimeMillis();
    do {
      Long o1 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
      Long o2 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
      Long o3 =
          ZookeeperOffsetHandler.getOffsetFromZooKeeper(
              curatorFramework, standardProps.getProperty("group.id"), topicName, 2);

      if (l49.equals(o1) && l49.equals(o2) && l49.equals(o3)) {
        break;
      }

      Thread.sleep(100);
    } while (System.currentTimeMillis() < deadline);

    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));

    final Throwable t = errorRef.get();
    if (t != null) {
      throw new RuntimeException("Job failed with an exception", t);
    }

    // check if offsets are correctly in ZK
    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    Assert.assertEquals(Long.valueOf(49L), o1);
    Assert.assertEquals(Long.valueOf(49L), o2);
    Assert.assertEquals(Long.valueOf(49L), o3);

    curatorFramework.close();
  }
예제 #4
0
  @Test
  public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    MapFunction<Long, Integer> mapFunction =
        new MapFunction<Long, Integer>() {
          @Override
          public Integer map(Long value) throws Exception {
            return null;
          }
        };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new NoOpSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));

    FlatMapFunction<Long, Integer> flatMapFunction =
        new FlatMapFunction<Long, Integer>() {
          private static final long serialVersionUID = 1L;

          @Override
          public void flatMap(Long value, Collector<Integer> out) throws Exception {}
        };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new NoOpSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));

    FilterFunction<Integer> filterFunction =
        new FilterFunction<Integer>() {
          @Override
          public boolean filter(Integer value) throws Exception {
            return false;
          }
        };

    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);

    unionFilter.addSink(new NoOpSink<Integer>());

    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    OutputSelector<Integer> outputSelector =
        new OutputSelector<Integer>() {
          @Override
          public Iterable<String> select(Integer value) {
            return null;
          }
        };

    SplitStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new NoOpSink<Integer>());
    List<OutputSelector<?>> outputSelectors =
        env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));

    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();

    StreamEdge splitEdge =
        env.getStreamGraph().getStreamEdge(unionFilter.getId(), sink.getTransformation().getId());
    assertEquals("a", splitEdge.getSelectedNames().get(0));

    ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper =
        new CoMapFunction<Integer, Integer, String>() {
          private static final long serialVersionUID = 1L;

          @Override
          public String map1(Integer value) {
            return null;
          }

          @Override
          public String map2(Integer value) {
            return null;
          }
        };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new NoOpSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }
  }
예제 #5
0
  /** Tests whether parallelism gets set. */
  @Test
  public void testParallelism() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Tuple2<Long, Long>> src = env.fromElements(new Tuple2<>(0L, 0L));
    env.setParallelism(10);

    SingleOutputStreamOperator<Long, ?> map =
        src.map(
                new MapFunction<Tuple2<Long, Long>, Long>() {
                  @Override
                  public Long map(Tuple2<Long, Long> value) throws Exception {
                    return null;
                  }
                })
            .name("MyMap");

    DataStream<Long> windowed =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(10)))
            .fold(
                0L,
                new FoldFunction<Long, Long>() {
                  @Override
                  public Long fold(Long accumulator, Long value) throws Exception {
                    return null;
                  }
                });

    windowed.addSink(new NoOpSink<Long>());

    DataStreamSink<Long> sink =
        map.addSink(
            new SinkFunction<Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void invoke(Long value) throws Exception {}
            });

    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    env.setParallelism(7);

    // Some parts, such as windowing rely on the fact that previous operators have a parallelism
    // set when instantiating the Discretizer. This would break if we dynamically changed
    // the parallelism of operations when changing the setting on the Execution Environment.
    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    try {
      src.setParallelism(3);
      fail();
    } catch (IllegalArgumentException success) {
      // do nothing
    }

    DataStreamSource<Long> parallelSource = env.generateSequence(0, 0);
    parallelSource.addSink(new NoOpSink<Long>());
    assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    parallelSource.setParallelism(3);
    assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    map.setParallelism(2);
    assertEquals(2, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());

    sink.setParallelism(4);
    assertEquals(
        4, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
  }
  /**
   * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to produce rolling
   * files. The clock of DateTimeBucketer is set to {@link ModifyableClock} to keep the time in
   * lockstep with the processing of elements using latches.
   */
  @Test
  public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped =
        source.flatMap(
            new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              int count = 0;

              @Override
              public void flatMap(Tuple2<Integer, String> value, Collector<String> out)
                  throws Exception {
                out.collect(value.f1);
                count++;
                if (count >= 5) {
                  if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                  } else {
                    latch2.trigger();
                  }
                  count = 0;
                }
              }
            });

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
      LocatedFileStatus file = files.next();
      numFiles++;
      if (file.getPath().toString().contains("rolling-out/00")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 0; i < 5; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/05")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 5; i < 10; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/10")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 10; i < 15; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/15")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 15; i < 20; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else {
        Assert.fail("File " + file + " does not match any expected roll pattern.");
      }
    }

    Assert.assertEquals(8, numFiles);
  }