Example #1
0
  @Test(timeout = 60000)
  public void testInvalidOffset() throws Exception {

    final int parallelism = 1;

    // write 20 messages into topic:
    final String topic = writeSequence("invalidOffsetTopic", 20, parallelism, 1);

    // set invalid offset:
    CuratorFramework curatorClient = ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();
    ZookeeperOffsetHandler.setOffsetInZooKeeper(
        curatorClient, standardProps.getProperty("group.id"), topic, 0, 1234);
    curatorClient.close();

    // read from topic
    final int valuesCount = 20;
    final int startFrom = 0;

    final StreamExecutionEnvironment env =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();

    readSequence(env, standardProps, parallelism, topic, valuesCount, startFrom);

    deleteTestTopic(topic);
  }
  public static void main(final String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // get input data
    final DataStream<String> text = getTextDataStream(env);

    final DataStream<Tuple2<String, Integer>> counts =
        text
            // split up the lines in pairs (2-tuples) containing: (word,1)
            // this is done by a bolt that is wrapped accordingly
            .transform(
                "BoltTokenizer",
                TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)),
                new BoltWrapper<String, Tuple2<String, Integer>>(new BoltTokenizer()))
            // group by the tuple field "0" and sum up tuple field "1"
            .keyBy(0)
            .sum(1);

    // emit result
    if (fileOutput) {
      counts.writeAsText(outputPath);
    } else {
      counts.print();
    }

    // execute program
    env.execute("Streaming WordCount with bolt tokenizer");
  }
  /**
   * Note: this test fails if we don't have the synchronized block in {@link
   * org.apache.flink.streaming.runtime.tasks.SourceStreamTask.SourceOutput}
   */
  @Test
  public void testOneInputOperatorWithoutChaining() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);

    DataStream<String> source = env.addSource(new InfiniteTestSource());

    source.transform(
        "Custom Operator",
        BasicTypeInfo.STRING_TYPE_INFO,
        new TimerOperator(StreamOperator.ChainingStrategy.NEVER));

    boolean testSuccess = false;
    try {
      env.execute("Timer test");
    } catch (JobExecutionException e) {
      if (e.getCause() instanceof TimerException) {
        TimerException te = (TimerException) e.getCause();
        if (te.getCause() instanceof RuntimeException) {
          RuntimeException re = (RuntimeException) te.getCause();
          if (re.getMessage().equals("TEST SUCCESS")) {
            testSuccess = true;
          } else {
            throw e;
          }
        } else {
          throw e;
        }
      } else {
        throw e;
      }
    }
    Assert.assertTrue(testSuccess);
  }
Example #4
0
  @Test
  public void testSimplePatternWithSingleState() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Integer, Integer>> input =
        env.fromElements(new Tuple2<>(0, 1), new Tuple2<>(0, 2));

    Pattern<Tuple2<Integer, Integer>, ?> pattern =
        Pattern.<Tuple2<Integer, Integer>>begin("start")
            .where(
                new FilterFunction<Tuple2<Integer, Integer>>() {
                  @Override
                  public boolean filter(Tuple2<Integer, Integer> rec) throws Exception {
                    return rec.f1 == 1;
                  }
                });

    PatternStream<Tuple2<Integer, Integer>> pStream = CEP.pattern(input, pattern);

    DataStream<Tuple2<Integer, Integer>> result =
        pStream.select(
            new PatternSelectFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>() {
              @Override
              public Tuple2<Integer, Integer> select(Map<String, Tuple2<Integer, Integer>> pattern)
                  throws Exception {
                return pattern.get("start");
              }
            });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    expected = "(0,1)";

    env.execute();
  }
  private static DataStream<String> getTextDataStream(final StreamExecutionEnvironment env) {
    if (fileOutput) {
      // read the text file from given input path
      return env.readTextFile(textPath);
    }

    return env.fromElements(WordCountData.WORDS);
  }
  /** This tests {@link StringWriter} with non-rolling output. */
  @Test
  public void testNonRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new NonRollingBucketer())
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    source
        .map(
            new MapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public String map(Tuple2<Integer, String> value) throws Exception {
                return value.f1;
              }
            })
        .addSink(sink);

    env.execute("RollingSink String Write Test");

    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();

    inStream = dfs.open(new Path(outPath + "/part-1-0"));

    br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();
  }
Example #7
0
  @Test(timeout = 60000)
  public void testOffsetAutocommitTest() throws Exception {
    final int parallelism = 3;

    // write a sequence from 0 to 99 to each of the 3 partitions.
    final String topicName = writeSequence("testOffsetAutocommit", 100, parallelism, 1);

    StreamExecutionEnvironment env =
        StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    // NOTE: We are not enabling the checkpointing!
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);

    // the readSequence operation sleeps for 20 ms between each record.
    // setting a delay of 25*20 = 500 for the commit interval makes
    // sure that we commit roughly 3-4 times while reading, however
    // at least once.
    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.commit.interval.ms", "500");

    // read so that the offset can be committed to ZK
    readSequence(env, readProps, parallelism, topicName, 100, 0);

    // get the offset
    CuratorFramework curatorFramework =
        ((KafkaTestEnvironmentImpl) kafkaServer).createCuratorClient();

    Long o1 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 0);
    Long o2 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 1);
    Long o3 =
        ZookeeperOffsetHandler.getOffsetFromZooKeeper(
            curatorFramework, standardProps.getProperty("group.id"), topicName, 2);
    curatorFramework.close();
    LOG.info("Got final offsets from zookeeper o1={}, o2={}, o3={}", o1, o2, o3);

    // ensure that the offset has been committed
    boolean atLeastOneOffsetSet =
        (o1 != null && o1 > 0 && o1 <= 100)
            || (o2 != null && o2 > 0 && o2 <= 100)
            || (o3 != null && o3 > 0 && o3 <= 100);
    assertTrue(
        "Expecting at least one offset to be set o1=" + o1 + " o2=" + o2 + " o3=" + o3,
        atLeastOneOffsetSet);

    deleteTestTopic(topicName);
  }
  @Override
  protected void testProgram() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<String> text = env.fromElements(WordCountData.TEXT);

    DataStream<Tuple2<String, Integer>> counts =
        text.flatMap(new CsvOutputFormatITCase.Tokenizer()).keyBy(0).sum(1);

    counts.writeAsText(resultPath);

    env.execute("WriteAsTextTest");
  }
Example #9
0
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up input for the stream of integer pairs

    // obtain execution environment and set setBufferTimeout to 1 to enable
    // continuous flushing of the output buffers (lowest latency)
    StreamExecutionEnvironment env =
        StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);

    // create input stream of integer pairs
    DataStream<Tuple2<Integer, Integer>> inputStream;
    if (fileInput) {
      inputStream = env.readTextFile(inputPath).map(new FibonacciInputMap());
    } else {
      inputStream = env.addSource(new RandomFibonacciSource());
    }

    // create an iterative data stream from the input with 5 second timeout
    IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it =
        inputStream.map(new InputMap()).iterate(5000);

    // apply the step function to get the next Fibonacci number
    // increment the counter and split the output with the output selector
    SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step =
        it.map(new Step()).split(new MySelector());

    // close the iteration by selecting the tuples that were directed to the
    // 'iterate' channel in the output selector
    it.closeWith(step.select("iterate"));

    // to produce the final output select the tuples directed to the
    // 'output' channel then get the input pairs that have the greatest iteration counter
    // on a 1 second sliding window
    DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers =
        step.select("output").map(new OutputMap());

    // emit results
    if (fileOutput) {
      numbers.writeAsText(outputPath, 1);
    } else {
      numbers.print();
    }

    // execute the program
    env.execute("Streaming Iteration Example");
  }
Example #10
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<>(0, "")), map.getType());

    DataStream<String> window =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .apply(
                new AllWindowFunction<Tuple2<Integer, String>, String, GlobalWindow>() {
                  @Override
                  public void apply(
                      GlobalWindow window,
                      Iterable<Tuple2<Integer, String>> values,
                      Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .fold(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                });

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Example #11
0
  private static JobGraph createJobGraphWithKeyedState(
      int parallelism,
      int maxParallelism,
      int numberKeys,
      int numberElements,
      boolean terminateAfterEmission,
      int checkpointingInterval) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());

    DataStream<Integer> input =
        env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission))
            .keyBy(
                new KeySelector<Integer, Integer>() {
                  private static final long serialVersionUID = -7952298871120320940L;

                  @Override
                  public Integer getKey(Integer value) throws Exception {
                    return value;
                  }
                });

    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

    DataStream<Tuple2<Integer, Integer>> result =
        input.flatMap(new SubtaskIndexFlatMapper(numberElements));

    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

    return env.getStreamGraph().getJobGraph();
  }
 /**
  * Windows this {@code DataStream} into sliding time windows.
  *
  * <p>This is a shortcut for either {@code .window(SlidingEventTimeWindows.of(size, slide))} or
  * {@code .window(SlidingProcessingTimeWindows.of(size, slide))} depending on the time
  * characteristic set using {@link
  * org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setStreamTimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic)}
  *
  * <p>Note: This operation can be inherently non-parallel since all elements have to pass through
  * the same operator instance. (Only for special cases, such as aligned time windows is it
  * possible to perform this operation in parallel).
  *
  * @param size The size of the window.
  */
 public AllWindowedStream<T, TimeWindow> timeWindowAll(Time size, Time slide) {
   if (environment.getStreamTimeCharacteristic() == TimeCharacteristic.ProcessingTime) {
     return windowAll(SlidingProcessingTimeWindows.of(size, slide));
   } else {
     return windowAll(SlidingEventTimeWindows.of(size, slide));
   }
 }
  private static DataStream<Tuple1<String>> getTextDataStream(
      final StreamExecutionEnvironment env) {
    if (fileOutput) {
      // read the text file from given input path
      TupleTypeInfo<Tuple1<String>> sourceType =
          (TupleTypeInfo<Tuple1<String>>) TypeExtractor.getForObject(new Tuple1<String>(""));
      return env.createInput(
          new CsvInputFormat<Tuple1<String>>(
              new Path(textPath),
              CsvInputFormat.DEFAULT_LINE_DELIMITER,
              CsvInputFormat.DEFAULT_LINE_DELIMITER,
              sourceType),
          sourceType);
    }

    return env.fromElements(WordCountDataTuple.TUPLES);
  }
Example #14
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<Integer, String>(0, "")), map.getType());

    WindowedDataStream<String> window =
        map.window(Count.of(5))
            .mapWindow(
                new WindowMapFunction<Tuple2<Integer, String>, String>() {
                  @Override
                  public void mapWindow(
                      Iterable<Tuple2<Integer, String>> values, Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .foldWindow(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                })
            .flatten();

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Example #15
0
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    @SuppressWarnings({"rawtypes", "serial"})
    DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
    if (fileInput) {
      carData = env.readTextFile(inputPath).map(new ParseCarData());
    } else {
      carData = env.addSource(CarSource.create(numOfCars));
    }
    DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds =
        carData
            .groupBy(0)
            .window(Time.of(evictionSec * 1000, new CarTimestamp()))
            .every(
                Delta.of(
                    triggerMeters,
                    new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
                      private static final long serialVersionUID = 1L;

                      @Override
                      public double getDelta(
                          Tuple4<Integer, Integer, Double, Long> oldDataPoint,
                          Tuple4<Integer, Integer, Double, Long> newDataPoint) {
                        return newDataPoint.f2 - oldDataPoint.f2;
                      }
                    },
                    new Tuple4<Integer, Integer, Double, Long>(0, 0, 0d, 0l)))
            .local()
            .maxBy(1)
            .flatten();
    if (fileOutput) {
      topSpeeds.writeAsText(outputPath);
    } else {
      topSpeeds.print();
    }

    env.execute("CarTopSpeedWindowingExample");
  }
Example #16
0
  @Test
  public void testChannelSelectors() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    DataStream<Long> broadcast = src.broadcast();
    DataStreamSink<Long> broadcastSink = broadcast.print();
    StreamPartitioner<?> broadcastPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), broadcastSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

    DataStream<Long> shuffle = src.shuffle();
    DataStreamSink<Long> shuffleSink = shuffle.print();
    StreamPartitioner<?> shufflePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), shuffleSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(shufflePartitioner instanceof ShufflePartitioner);

    DataStream<Long> forward = src.forward();
    DataStreamSink<Long> forwardSink = forward.print();
    StreamPartitioner<?> forwardPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), forwardSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(forwardPartitioner instanceof ForwardPartitioner);

    DataStream<Long> rebalance = src.rebalance();
    DataStreamSink<Long> rebalanceSink = rebalance.print();
    StreamPartitioner<?> rebalancePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), rebalanceSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

    DataStream<Long> global = src.global();
    DataStreamSink<Long> globalSink = global.print();
    StreamPartitioner<?> globalPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), globalSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(globalPartitioner instanceof GlobalPartitioner);
  }
Example #17
0
  private FlinkTopology(TopologyBuilder builder) {
    this.builder = builder;
    this.stormTopology = builder.createTopology();
    // extract the spouts and bolts
    this.spouts = getPrivateField("_spouts");
    this.bolts = getPrivateField("_bolts");

    this.env = StreamExecutionEnvironment.getExecutionEnvironment();

    // Kick off the translation immediately
    translateTopology();
  }
  public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // parse user parameters
    //		ParameterTool parameterTool = ParameterTool.fromArgs(args);

    //		DataStream<String> messageStream = env.addSource(new
    // FlinkKafkaConsumer(parameterTool.getRequired("topic"), new SimpleStringSchema(),
    // parameterTool.getProperties()));

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "node2:9092");
    properties.setProperty("zookeeper.connect", "node2:2181");
    properties.setProperty("group.id", "1");
    DataStream<String> messageStream =
        env.addSource(
            new FlinkKafkaConsumer082<>("demo", new SimpleStringSchema(), properties)); // print();

    messageStream.print();
    System.out.print(messageStream + " Hello\n");

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    //		messageStream.rebalance().map(new MapFunction<String, String>() {
    //			private static final long serialVersionUID = -6867736771747690202L;

    //			@Override
    //			public String map(String value) throws Exception {
    //				return "Kafka and Flink says: " + value;
    //			}
    //		}).print();

    env.execute("kafka consumer");
  }
Example #19
0
  @PublicEvolving
  public <R> SingleOutputStreamOperator<R> transform(
      String functionName,
      TypeInformation<R> outTypeInfo,
      TwoInputStreamOperator<IN1, IN2, R> operator) {

    // read the output type of the input Transforms to coax out errors about MissingTypeInfo
    inputStream1.getType();
    inputStream2.getType();

    TwoInputTransformation<IN1, IN2, R> transform =
        new TwoInputTransformation<>(
            inputStream1.getTransformation(),
            inputStream2.getTransformation(),
            functionName,
            operator,
            outTypeInfo,
            environment.getParallelism());

    if (inputStream1 instanceof KeyedStream && inputStream2 instanceof KeyedStream) {
      KeyedStream<IN1, ?> keyedInput1 = (KeyedStream<IN1, ?>) inputStream1;
      KeyedStream<IN2, ?> keyedInput2 = (KeyedStream<IN2, ?>) inputStream2;

      TypeInformation<?> keyType1 = keyedInput1.getKeyType();
      TypeInformation<?> keyType2 = keyedInput2.getKeyType();
      if (!(keyType1.canEqual(keyType2) && keyType1.equals(keyType2))) {
        throw new UnsupportedOperationException(
            "Key types if input KeyedStreams "
                + "don't match: "
                + keyType1
                + " and "
                + keyType2
                + ".");
      }

      transform.setStateKeySelectors(keyedInput1.getKeySelector(), keyedInput2.getKeySelector());
      transform.setStateKeyType(keyType1);
    }

    @SuppressWarnings({"unchecked", "rawtypes"})
    SingleOutputStreamOperator<R> returnStream =
        new SingleOutputStreamOperator(environment, transform);

    getExecutionEnvironment().addOperator(transform);

    return returnStream;
  }
  public static void tryExecute(StreamExecutionEnvironment env, String jobName) throws Exception {
    try {
      env.execute(jobName);
    } catch (ProgramInvocationException | JobExecutionException root) {
      Throwable cause = root.getCause();

      // search for nested SuccessExceptions
      int depth = 0;
      while (!(cause instanceof SuccessException)) {
        if (cause == null || depth++ == 20) {
          root.printStackTrace();
          fail("Test failed: " + root.getMessage());
        } else {
          cause = cause.getCause();
        }
      }
    }
  }
  /**
   * Method for passing user defined operators along with the type information that will transform
   * the DataStream.
   *
   * @param operatorName name of the operator, for logging purposes
   * @param outTypeInfo the output type of the operator
   * @param operator the object containing the transformation logic
   * @param <R> type of the return stream
   * @return the data stream constructed
   */
  @PublicEvolving
  public <R> SingleOutputStreamOperator<R> transform(
      String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) {

    // read the output type of the input Transform to coax out errors about MissingTypeInfo
    transformation.getOutputType();

    OneInputTransformation<T, R> resultTransform =
        new OneInputTransformation<>(
            this.transformation, operatorName, operator, outTypeInfo, environment.getParallelism());

    @SuppressWarnings({"unchecked", "rawtypes"})
    SingleOutputStreamOperator<R> returnStream =
        new SingleOutputStreamOperator(environment, resultTransform);

    getExecutionEnvironment().addOperator(resultTransform);

    return returnStream;
  }
Example #22
0
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
Example #23
0
  public static void generateLongStringTupleSequence(
      StreamExecutionEnvironment env,
      String brokerConnection,
      String topic,
      int numPartitions,
      final int from,
      final int to)
      throws Exception {

    TypeInformation<Tuple2<Integer, Integer>> resultType =
        TypeInfoParser.parse("Tuple2<Integer, Integer>");

    env.setParallelism(numPartitions);
    env.getConfig().disableSysoutLogging();
    env.setNumberOfExecutionRetries(0);

    DataStream<Tuple2<Integer, Integer>> stream =
        env.addSource(
            new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

              private volatile boolean running = true;

              @Override
              public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                int cnt = from;
                int partition = getRuntimeContext().getIndexOfThisSubtask();

                while (running && cnt <= to) {
                  ctx.collect(new Tuple2<Integer, Integer>(partition, cnt));
                  cnt++;
                }
              }

              @Override
              public void cancel() {
                running = false;
              }
            });

    stream.addSink(
        new FlinkKafkaProducer<>(
            topic,
            new TypeInformationSerializationSchema<>(resultType, env.getConfig()),
            FlinkKafkaProducer.getPropertiesFromBrokerList(brokerConnection),
            new Tuple2Partitioner(numPartitions)));

    env.execute("Data generator (Int, Int) stream to topic " + topic);
  }
Example #24
0
  /**
   * Verify that the user-specified state backend is used even if checkpointing is disabled.
   *
   * @throws Exception
   */
  @Test
  public void testStateBackendWithoutCheckpointing() throws Exception {

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);

    see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    see.setStateBackend(new FailingStateBackend());

    see.fromElements(new Tuple2<>("Hello", 1))
        .keyBy(0)
        .map(
            new RichMapFunction<Tuple2<String, Integer>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                getRuntimeContext().getKeyValueState("test", String.class, "");
              }

              @Override
              public String map(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
              }
            })
        .print();

    try {
      see.execute();
      fail();
    } catch (JobExecutionException e) {
      Throwable t = e.getCause();
      if (!(t != null && t.getCause() instanceof SuccessException)) {
        throw e;
      }
    }
  }
 public ExecutionConfig getExecutionConfig() {
   return environment.getConfig();
 }
Example #26
0
  /**
   * Runs the following program:
   *
   * <pre>
   *     [ (source)->(filter)->(map) ] -> [ (map) ] -> [ (groupBy/reduce)->(sink) ]
   * </pre>
   */
  @Test
  public void runCheckpointedProgram() {

    final long NUM_STRINGS = 10000000L;
    assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getJobManagerRPCPort());
      env.setParallelism(PARALLELISM);
      env.enableCheckpointing(500);
      env.getConfig().disableSysoutLogging();

      DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS));

      stream
          // -------------- first vertex, chained to the source ----------------
          .filter(new StringRichFilterFunction())

          // -------------- seconds vertex - the stateful one that also fails ----------------
          .map(new StringPrefixCountRichMapFunction())
          .startNewChain()
          .map(new StatefulCounterFunction())

          // -------------- third vertex - reducer and the sink ----------------
          .groupBy("prefix")
          .reduce(new OnceFailingReducer(NUM_STRINGS))
          .addSink(
              new RichSinkFunction<PrefixCount>() {

                private Map<Character, Long> counts = new HashMap<Character, Long>();

                @Override
                public void invoke(PrefixCount value) {
                  Character first = value.prefix.charAt(0);
                  Long previous = counts.get(first);
                  if (previous == null) {
                    counts.put(first, value.count);
                  } else {
                    counts.put(first, Math.max(previous, value.count));
                  }
                }

                //						@Override
                //						public void close() {
                //							for (Long count : counts.values()) {
                //								assertEquals(NUM_STRINGS / 40, count.longValue());
                //							}
                //						}
              });

      env.execute();

      long filterSum = 0;
      for (long l : StringRichFilterFunction.counts) {
        filterSum += l;
      }

      long mapSum = 0;
      for (long l : StringPrefixCountRichMapFunction.counts) {
        mapSum += l;
      }

      long countSum = 0;
      for (long l : StatefulCounterFunction.counts) {
        countSum += l;
      }

      // verify that we counted exactly right

      // this line should be uncommented once the "exactly one off by one" is fixed
      // if this fails we see at which point the count is off
      assertEquals(NUM_STRINGS, filterSum);
      assertEquals(NUM_STRINGS, mapSum);
      assertEquals(NUM_STRINGS, countSum);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Example #27
0
  /**
   * Checks that a certain event sequence is recognized
   *
   * @throws Exception
   */
  @Test
  public void testSimplePatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Event> input =
        env.fromElements(
            new Event(1, "barfoo", 1.0),
            new Event(2, "start", 2.0),
            new Event(3, "foobar", 3.0),
            new SubEvent(4, "foo", 4.0, 1.0),
            new Event(5, "middle", 5.0),
            new SubEvent(6, "middle", 6.0, 2.0),
            new SubEvent(7, "bar", 3.0, 3.0),
            new Event(42, "42", 42.0),
            new Event(8, "end", 1.0));

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .subtype(SubEvent.class)
            .where(
                new FilterFunction<SubEvent>() {

                  @Override
                  public boolean filter(SubEvent value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // expected sequence of matching event ids
    expected = "2,6,8";

    env.execute();
  }
Example #28
0
  @Test
  public void testSimpleKeyedPatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);

    // (Event, timestamp)
    DataStream<Event> input =
        env.fromElements(
                Tuple2.of(new Event(1, "start", 1.0), 5L),
                Tuple2.of(new Event(1, "middle", 2.0), 1L),
                Tuple2.of(new Event(2, "middle", 2.0), 4L),
                Tuple2.of(new Event(2, "start", 2.0), 3L),
                Tuple2.of(new Event(1, "end", 3.0), 3L),
                Tuple2.of(new Event(3, "start", 4.1), 5L),
                Tuple2.of(new Event(1, "end", 4.0), 10L),
                Tuple2.of(new Event(2, "end", 2.0), 8L),
                Tuple2.of(new Event(1, "middle", 5.0), 7L),
                Tuple2.of(new Event(3, "middle", 6.0), 9L),
                Tuple2.of(new Event(3, "end", 7.0), 7L),
                // last element for high final watermark
                Tuple2.of(new Event(3, "end", 7.0), 100L))
            .assignTimestampsAndWatermarks(
                new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

                  @Override
                  public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
                    return element.f1;
                  }

                  @Override
                  public Watermark checkAndGetNextWatermark(
                      Tuple2<Event, Long> lastElement, long extractedTimestamp) {
                    return new Watermark(lastElement.f1 - 5);
                  }
                })
            .map(
                new MapFunction<Tuple2<Event, Long>, Event>() {

                  @Override
                  public Event map(Tuple2<Event, Long> value) throws Exception {
                    return value.f0;
                  }
                })
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "1,1,1\n2,2,2";

    env.execute();
  }
  @Test
  public void testTumblingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 100;
    final int NUM_KEYS = 100;
    FailingSource.reset();

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getLeaderRPCPort());

      env.setParallelism(PARALLELISM);
      env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
      env.enableCheckpointing(100);
      env.setNumberOfExecutionRetries(3);
      env.getConfig().disableSysoutLogging();

      env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3))
          .rebalance()
          .keyBy(0)
          .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS))
          .apply(
              new RichWindowFunction<
                  Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

                private boolean open = false;

                @Override
                public void open(Configuration parameters) {
                  assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                  open = true;
                }

                @Override
                public void apply(
                    Tuple tuple,
                    TimeWindow window,
                    Iterable<Tuple2<Long, IntType>> values,
                    Collector<Tuple4<Long, Long, Long, IntType>> out) {

                  // validate that the function has been opened properly
                  assertTrue(open);

                  int sum = 0;
                  long key = -1;

                  for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                  }
                  out.collect(
                      new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
                }
              })
          .addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE))
          .setParallelism(1);

      tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testTumblingTimeWindowWithKVState() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 100;
    final int NUM_KEYS = 100;
    FailingSource.reset();

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getLeaderRPCPort());

      env.setParallelism(PARALLELISM);
      env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
      env.enableCheckpointing(100);
      env.setNumberOfExecutionRetries(3);
      env.getConfig().disableSysoutLogging();

      env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3))
          .rebalance()
          .keyBy(0)
          .timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS))
          .apply(
              new RichWindowFunction<
                  Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

                private boolean open = false;

                private OperatorState<Integer> count;

                @Override
                public void open(Configuration parameters) {
                  assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                  open = true;
                  count = getRuntimeContext().getKeyValueState("count", Integer.class, 0);
                }

                @Override
                public void apply(
                    Tuple tuple,
                    TimeWindow window,
                    Iterable<Tuple2<Long, IntType>> values,
                    Collector<Tuple4<Long, Long, Long, IntType>> out)
                    throws Exception {

                  // the window count state starts with the key, so that we get
                  // different count results for each key
                  if (count.value() == 0) {
                    count.update(tuple.<Long>getField(0).intValue());
                  }

                  // validate that the function has been opened properly
                  assertTrue(open);

                  count.update(count.value() + 1);
                  out.collect(
                      new Tuple4<>(
                          tuple.<Long>getField(0),
                          window.getStart(),
                          window.getEnd(),
                          new IntType(count.value())));
                }
              })
          .addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE))
          .setParallelism(1);

      tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }