Example #1
0
  private static JobGraph createJobGraphWithKeyedState(
      int parallelism,
      int maxParallelism,
      int numberKeys,
      int numberElements,
      boolean terminateAfterEmission,
      int checkpointingInterval) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());

    DataStream<Integer> input =
        env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission))
            .keyBy(
                new KeySelector<Integer, Integer>() {
                  private static final long serialVersionUID = -7952298871120320940L;

                  @Override
                  public Integer getKey(Integer value) throws Exception {
                    return value;
                  }
                });

    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

    DataStream<Tuple2<Integer, Integer>> result =
        input.flatMap(new SubtaskIndexFlatMapper(numberElements));

    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

    return env.getStreamGraph().getJobGraph();
  }
  /**
   * Note: this test fails if we don't have the synchronized block in {@link
   * org.apache.flink.streaming.runtime.tasks.SourceStreamTask.SourceOutput}
   */
  @Test
  public void testOneInputOperatorWithoutChaining() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);

    DataStream<String> source = env.addSource(new InfiniteTestSource());

    source.transform(
        "Custom Operator",
        BasicTypeInfo.STRING_TYPE_INFO,
        new TimerOperator(StreamOperator.ChainingStrategy.NEVER));

    boolean testSuccess = false;
    try {
      env.execute("Timer test");
    } catch (JobExecutionException e) {
      if (e.getCause() instanceof TimerException) {
        TimerException te = (TimerException) e.getCause();
        if (te.getCause() instanceof RuntimeException) {
          RuntimeException re = (RuntimeException) te.getCause();
          if (re.getMessage().equals("TEST SUCCESS")) {
            testSuccess = true;
          } else {
            throw e;
          }
        } else {
          throw e;
        }
      } else {
        throw e;
      }
    }
    Assert.assertTrue(testSuccess);
  }
Example #3
0
  @Test
  public void testSimplePatternWithSingleState() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Integer, Integer>> input =
        env.fromElements(new Tuple2<>(0, 1), new Tuple2<>(0, 2));

    Pattern<Tuple2<Integer, Integer>, ?> pattern =
        Pattern.<Tuple2<Integer, Integer>>begin("start")
            .where(
                new FilterFunction<Tuple2<Integer, Integer>>() {
                  @Override
                  public boolean filter(Tuple2<Integer, Integer> rec) throws Exception {
                    return rec.f1 == 1;
                  }
                });

    PatternStream<Tuple2<Integer, Integer>> pStream = CEP.pattern(input, pattern);

    DataStream<Tuple2<Integer, Integer>> result =
        pStream.select(
            new PatternSelectFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>() {
              @Override
              public Tuple2<Integer, Integer> select(Map<String, Tuple2<Integer, Integer>> pattern)
                  throws Exception {
                return pattern.get("start");
              }
            });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    expected = "(0,1)";

    env.execute();
  }
  public static void main(final String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // get input data
    final DataStream<String> text = getTextDataStream(env);

    final DataStream<Tuple2<String, Integer>> counts =
        text
            // split up the lines in pairs (2-tuples) containing: (word,1)
            // this is done by a bolt that is wrapped accordingly
            .transform(
                "BoltTokenizer",
                TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)),
                new BoltWrapper<String, Tuple2<String, Integer>>(new BoltTokenizer()))
            // group by the tuple field "0" and sum up tuple field "1"
            .keyBy(0)
            .sum(1);

    // emit result
    if (fileOutput) {
      counts.writeAsText(outputPath);
    } else {
      counts.print();
    }

    // execute program
    env.execute("Streaming WordCount with bolt tokenizer");
  }
  /** This tests {@link StringWriter} with non-rolling output. */
  @Test
  public void testNonRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new NonRollingBucketer())
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    source
        .map(
            new MapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public String map(Tuple2<Integer, String> value) throws Exception {
                return value.f1;
              }
            })
        .addSink(sink);

    env.execute("RollingSink String Write Test");

    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();

    inStream = dfs.open(new Path(outPath + "/part-1-0"));

    br = new BufferedReader(new InputStreamReader(inStream));

    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
      String line = br.readLine();
      Assert.assertEquals("message #" + i, line);
    }

    inStream.close();
  }
Example #6
0
  private FlinkTopology(TopologyBuilder builder) {
    this.builder = builder;
    this.stormTopology = builder.createTopology();
    // extract the spouts and bolts
    this.spouts = getPrivateField("_spouts");
    this.bolts = getPrivateField("_bolts");

    this.env = StreamExecutionEnvironment.getExecutionEnvironment();

    // Kick off the translation immediately
    translateTopology();
  }
  @Override
  protected void testProgram() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<String> text = env.fromElements(WordCountData.TEXT);

    DataStream<Tuple2<String, Integer>> counts =
        text.flatMap(new CsvOutputFormatITCase.Tokenizer()).keyBy(0).sum(1);

    counts.writeAsText(resultPath);

    env.execute("WriteAsTextTest");
  }
Example #8
0
  @Test
  public void testChannelSelectors() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    DataStream<Long> broadcast = src.broadcast();
    DataStreamSink<Long> broadcastSink = broadcast.print();
    StreamPartitioner<?> broadcastPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), broadcastSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

    DataStream<Long> shuffle = src.shuffle();
    DataStreamSink<Long> shuffleSink = shuffle.print();
    StreamPartitioner<?> shufflePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), shuffleSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(shufflePartitioner instanceof ShufflePartitioner);

    DataStream<Long> forward = src.forward();
    DataStreamSink<Long> forwardSink = forward.print();
    StreamPartitioner<?> forwardPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), forwardSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(forwardPartitioner instanceof ForwardPartitioner);

    DataStream<Long> rebalance = src.rebalance();
    DataStreamSink<Long> rebalanceSink = rebalance.print();
    StreamPartitioner<?> rebalancePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), rebalanceSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

    DataStream<Long> global = src.global();
    DataStreamSink<Long> globalSink = global.print();
    StreamPartitioner<?> globalPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), globalSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(globalPartitioner instanceof GlobalPartitioner);
  }
Example #9
0
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up input for the stream of integer pairs

    // obtain execution environment and set setBufferTimeout to 1 to enable
    // continuous flushing of the output buffers (lowest latency)
    StreamExecutionEnvironment env =
        StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);

    // create input stream of integer pairs
    DataStream<Tuple2<Integer, Integer>> inputStream;
    if (fileInput) {
      inputStream = env.readTextFile(inputPath).map(new FibonacciInputMap());
    } else {
      inputStream = env.addSource(new RandomFibonacciSource());
    }

    // create an iterative data stream from the input with 5 second timeout
    IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it =
        inputStream.map(new InputMap()).iterate(5000);

    // apply the step function to get the next Fibonacci number
    // increment the counter and split the output with the output selector
    SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step =
        it.map(new Step()).split(new MySelector());

    // close the iteration by selecting the tuples that were directed to the
    // 'iterate' channel in the output selector
    it.closeWith(step.select("iterate"));

    // to produce the final output select the tuples directed to the
    // 'output' channel then get the input pairs that have the greatest iteration counter
    // on a 1 second sliding window
    DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers =
        step.select("output").map(new OutputMap());

    // emit results
    if (fileOutput) {
      numbers.writeAsText(outputPath, 1);
    } else {
      numbers.print();
    }

    // execute the program
    env.execute("Streaming Iteration Example");
  }
Example #10
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<>(0, "")), map.getType());

    DataStream<String> window =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .apply(
                new AllWindowFunction<Tuple2<Integer, String>, String, GlobalWindow>() {
                  @Override
                  public void apply(
                      GlobalWindow window,
                      Iterable<Tuple2<Integer, String>> values,
                      Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .fold(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                });

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Example #11
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<Integer, String>(0, "")), map.getType());

    WindowedDataStream<String> window =
        map.window(Count.of(5))
            .mapWindow(
                new WindowMapFunction<Tuple2<Integer, String>, String>() {
                  @Override
                  public void mapWindow(
                      Iterable<Tuple2<Integer, String>> values, Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .foldWindow(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                })
            .flatten();

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Example #12
0
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    @SuppressWarnings({"rawtypes", "serial"})
    DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
    if (fileInput) {
      carData = env.readTextFile(inputPath).map(new ParseCarData());
    } else {
      carData = env.addSource(CarSource.create(numOfCars));
    }
    DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds =
        carData
            .groupBy(0)
            .window(Time.of(evictionSec * 1000, new CarTimestamp()))
            .every(
                Delta.of(
                    triggerMeters,
                    new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
                      private static final long serialVersionUID = 1L;

                      @Override
                      public double getDelta(
                          Tuple4<Integer, Integer, Double, Long> oldDataPoint,
                          Tuple4<Integer, Integer, Double, Long> newDataPoint) {
                        return newDataPoint.f2 - oldDataPoint.f2;
                      }
                    },
                    new Tuple4<Integer, Integer, Double, Long>(0, 0, 0d, 0l)))
            .local()
            .maxBy(1)
            .flatten();
    if (fileOutput) {
      topSpeeds.writeAsText(outputPath);
    } else {
      topSpeeds.print();
    }

    env.execute("CarTopSpeedWindowingExample");
  }
Example #13
0
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
Example #14
0
  /**
   * Verify that the user-specified state backend is used even if checkpointing is disabled.
   *
   * @throws Exception
   */
  @Test
  public void testStateBackendWithoutCheckpointing() throws Exception {

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);

    see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    see.setStateBackend(new FailingStateBackend());

    see.fromElements(new Tuple2<>("Hello", 1))
        .keyBy(0)
        .map(
            new RichMapFunction<Tuple2<String, Integer>, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                getRuntimeContext().getKeyValueState("test", String.class, "");
              }

              @Override
              public String map(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
              }
            })
        .print();

    try {
      see.execute();
      fail();
    } catch (JobExecutionException e) {
      Throwable t = e.getCause();
      if (!(t != null && t.getCause() instanceof SuccessException)) {
        throw e;
      }
    }
  }
  public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // parse user parameters
    //		ParameterTool parameterTool = ParameterTool.fromArgs(args);

    //		DataStream<String> messageStream = env.addSource(new
    // FlinkKafkaConsumer(parameterTool.getRequired("topic"), new SimpleStringSchema(),
    // parameterTool.getProperties()));

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "node2:9092");
    properties.setProperty("zookeeper.connect", "node2:2181");
    properties.setProperty("group.id", "1");
    DataStream<String> messageStream =
        env.addSource(
            new FlinkKafkaConsumer082<>("demo", new SimpleStringSchema(), properties)); // print();

    messageStream.print();
    System.out.print(messageStream + " Hello\n");

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    //		messageStream.rebalance().map(new MapFunction<String, String>() {
    //			private static final long serialVersionUID = -6867736771747690202L;

    //			@Override
    //			public String map(String value) throws Exception {
    //				return "Kafka and Flink says: " + value;
    //			}
    //		}).print();

    env.execute("kafka consumer");
  }
Example #16
0
  /**
   * Tests that {@link DataStream#keyBy} and {@link DataStream#partitionByHash} result in different
   * and correct topologies. Does the some for the {@link ConnectedStreams}.
   */
  @Test
  @SuppressWarnings("unchecked")
  public void testPartitioning() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataStream<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connected = src1.connect(src2);

    // Testing DataStream grouping
    DataStream<Tuple2<Long, Long>> group1 = src1.keyBy(0);
    DataStream<Tuple2<Long, Long>> group2 = src1.keyBy(1, 0);
    DataStream<Tuple2<Long, Long>> group3 = src1.keyBy("f0");
    DataStream<Tuple2<Long, Long>> group4 = src1.keyBy(new FirstSelector());

    int id1 = createDownStreamId(group1);
    int id2 = createDownStreamId(group2);
    int id3 = createDownStreamId(group3);
    int id4 = createDownStreamId(group4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), id4)));

    assertTrue(isKeyed(group1));
    assertTrue(isKeyed(group2));
    assertTrue(isKeyed(group3));
    assertTrue(isKeyed(group4));

    // Testing DataStream partitioning
    DataStream<Tuple2<Long, Long>> partition1 = src1.partitionByHash(0);
    DataStream<Tuple2<Long, Long>> partition2 = src1.partitionByHash(1, 0);
    DataStream<Tuple2<Long, Long>> partition3 = src1.partitionByHash("f0");
    DataStream<Tuple2<Long, Long>> partition4 = src1.partitionByHash(new FirstSelector());

    int pid1 = createDownStreamId(partition1);
    int pid2 = createDownStreamId(partition2);
    int pid3 = createDownStreamId(partition3);
    int pid4 = createDownStreamId(partition4);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), pid4)));

    assertFalse(isKeyed(partition1));
    assertFalse(isKeyed(partition3));
    assertFalse(isKeyed(partition2));
    assertFalse(isKeyed(partition4));

    // Testing DataStream custom partitioning
    Partitioner<Long> longPartitioner =
        new Partitioner<Long>() {
          @Override
          public int partition(Long key, int numPartitions) {
            return 100;
          }
        };

    DataStream<Tuple2<Long, Long>> customPartition1 = src1.partitionCustom(longPartitioner, 0);
    DataStream<Tuple2<Long, Long>> customPartition3 = src1.partitionCustom(longPartitioner, "f0");
    DataStream<Tuple2<Long, Long>> customPartition4 =
        src1.partitionCustom(longPartitioner, new FirstSelector());

    int cid1 = createDownStreamId(customPartition1);
    int cid2 = createDownStreamId(customPartition3);
    int cid3 = createDownStreamId(customPartition4);

    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid1)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid2)));
    assertTrue(isCustomPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), cid3)));

    assertFalse(isKeyed(customPartition1));
    assertFalse(isKeyed(customPartition3));
    assertFalse(isKeyed(customPartition4));

    // Testing ConnectedStreams grouping
    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedGroup1 =
        connected.keyBy(0, 0);
    Integer downStreamId1 = createDownStreamId(connectedGroup1);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedGroup2 =
        connected.keyBy(new int[] {0}, new int[] {0});
    Integer downStreamId2 = createDownStreamId(connectedGroup2);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedGroup3 =
        connected.keyBy("f0", "f0");
    Integer downStreamId3 = createDownStreamId(connectedGroup3);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedGroup4 =
        connected.keyBy(new String[] {"f0"}, new String[] {"f0"});
    Integer downStreamId4 = createDownStreamId(connectedGroup4);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedGroup5 =
        connected.keyBy(new FirstSelector(), new FirstSelector());
    Integer downStreamId5 = createDownStreamId(connectedGroup5);

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId1)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId1)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId2)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId2)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId3)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId3)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId4)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId4)));

    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), downStreamId5)));
    assertTrue(isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), downStreamId5)));

    assertTrue(isKeyed(connectedGroup1));
    assertTrue(isKeyed(connectedGroup2));
    assertTrue(isKeyed(connectedGroup3));
    assertTrue(isKeyed(connectedGroup4));
    assertTrue(isKeyed(connectedGroup5));

    // Testing ConnectedStreams partitioning
    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedPartition1 =
        connected.partitionByHash(0, 0);
    Integer connectDownStreamId1 = createDownStreamId(connectedPartition1);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedPartition2 =
        connected.partitionByHash(new int[] {0}, new int[] {0});
    Integer connectDownStreamId2 = createDownStreamId(connectedPartition2);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedPartition3 =
        connected.partitionByHash("f0", "f0");
    Integer connectDownStreamId3 = createDownStreamId(connectedPartition3);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedPartition4 =
        connected.partitionByHash(new String[] {"f0"}, new String[] {"f0"});
    Integer connectDownStreamId4 = createDownStreamId(connectedPartition4);

    ConnectedStreams<Tuple2<Long, Long>, Tuple2<Long, Long>> connectedPartition5 =
        connected.partitionByHash(new FirstSelector(), new FirstSelector());
    Integer connectDownStreamId5 = createDownStreamId(connectedPartition5);

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId1)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId1)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId2)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId2)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId3)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId3)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId4)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId4)));

    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src1.getId(), connectDownStreamId5)));
    assertTrue(
        isPartitioned(env.getStreamGraph().getStreamEdge(src2.getId(), connectDownStreamId5)));

    assertFalse(isKeyed(connectedPartition1));
    assertFalse(isKeyed(connectedPartition2));
    assertFalse(isKeyed(connectedPartition3));
    assertFalse(isKeyed(connectedPartition4));
    assertFalse(isKeyed(connectedPartition5));
  }
Example #17
0
  /**
   * Checks that a certain event sequence is recognized
   *
   * @throws Exception
   */
  @Test
  public void testSimplePatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Event> input =
        env.fromElements(
            new Event(1, "barfoo", 1.0),
            new Event(2, "start", 2.0),
            new Event(3, "foobar", 3.0),
            new SubEvent(4, "foo", 4.0, 1.0),
            new Event(5, "middle", 5.0),
            new SubEvent(6, "middle", 6.0, 2.0),
            new SubEvent(7, "bar", 3.0, 3.0),
            new Event(42, "42", 42.0),
            new Event(8, "end", 1.0));

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .subtype(SubEvent.class)
            .where(
                new FilterFunction<SubEvent>() {

                  @Override
                  public boolean filter(SubEvent value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // expected sequence of matching event ids
    expected = "2,6,8";

    env.execute();
  }
Example #18
0
  @Test
  public void testSimpleKeyedPatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);

    // (Event, timestamp)
    DataStream<Event> input =
        env.fromElements(
                Tuple2.of(new Event(1, "start", 1.0), 5L),
                Tuple2.of(new Event(1, "middle", 2.0), 1L),
                Tuple2.of(new Event(2, "middle", 2.0), 4L),
                Tuple2.of(new Event(2, "start", 2.0), 3L),
                Tuple2.of(new Event(1, "end", 3.0), 3L),
                Tuple2.of(new Event(3, "start", 4.1), 5L),
                Tuple2.of(new Event(1, "end", 4.0), 10L),
                Tuple2.of(new Event(2, "end", 2.0), 8L),
                Tuple2.of(new Event(1, "middle", 5.0), 7L),
                Tuple2.of(new Event(3, "middle", 6.0), 9L),
                Tuple2.of(new Event(3, "end", 7.0), 7L),
                // last element for high final watermark
                Tuple2.of(new Event(3, "end", 7.0), 100L))
            .assignTimestampsAndWatermarks(
                new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

                  @Override
                  public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
                    return element.f1;
                  }

                  @Override
                  public Watermark checkAndGetNextWatermark(
                      Tuple2<Event, Long> lastElement, long extractedTimestamp) {
                    return new Watermark(lastElement.f1 - 5);
                  }
                })
            .map(
                new MapFunction<Tuple2<Event, Long>, Event>() {

                  @Override
                  public Event map(Tuple2<Event, Long> value) throws Exception {
                    return value.f0;
                  }
                })
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "1,1,1\n2,2,2";

    env.execute();
  }
  /** This tests {@link SequenceFileWriter} with non-rolling output but with compression. */
  @Test
  public void testNonRollingSequenceFileWithCompressionWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/seq-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());

    DataStream<Tuple2<IntWritable, Text>> mapped =
        source.map(
            new MapFunction<Tuple2<Integer, String>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<Integer, String> value) throws Exception {
                return Tuple2.of(new IntWritable(value.f0), new Text(value.f1));
              }
            });

    RollingSink<Tuple2<IntWritable, Text>> sink =
        new RollingSink<Tuple2<IntWritable, Text>>(outPath)
            .setWriter(
                new SequenceFileWriter<IntWritable, Text>(
                    "Default", SequenceFile.CompressionType.BLOCK))
            .setBucketer(new NonRollingBucketer())
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

    SequenceFile.Reader reader =
        new SequenceFile.Reader(inStream, 1000, 0, 100000, new Configuration());

    IntWritable intWritable = new IntWritable();
    Text txt = new Text();

    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
      reader.next(intWritable, txt);
      Assert.assertEquals(i, intWritable.get());
      Assert.assertEquals("message #" + i, txt.toString());
    }

    reader.close();
    inStream.close();

    inStream = dfs.open(new Path(outPath + "/part-1-0"));

    reader = new SequenceFile.Reader(inStream, 1000, 0, 100000, new Configuration());

    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
      reader.next(intWritable, txt);
      Assert.assertEquals(i, intWritable.get());
      Assert.assertEquals("message #" + i, txt.toString());
    }

    reader.close();
    inStream.close();
  }
  /**
   * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to produce rolling
   * files. The clock of DateTimeBucketer is set to {@link ModifyableClock} to keep the time in
   * lockstep with the processing of elements using latches.
   */
  @Test
  public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped =
        source.flatMap(
            new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              int count = 0;

              @Override
              public void flatMap(Tuple2<Integer, String> value, Collector<String> out)
                  throws Exception {
                out.collect(value.f1);
                count++;
                if (count >= 5) {
                  if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                  } else {
                    latch2.trigger();
                  }
                  count = 0;
                }
              }
            });

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
      LocatedFileStatus file = files.next();
      numFiles++;
      if (file.getPath().toString().contains("rolling-out/00")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 0; i < 5; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/05")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 5; i < 10; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/10")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 10; i < 15; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/15")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 15; i < 20; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else {
        Assert.fail("File " + file + " does not match any expected roll pattern.");
      }
    }

    Assert.assertEquals(8, numFiles);
  }
Example #21
0
  /**
   * Tests {@link SingleOutputStreamOperator#name(String)} functionality.
   *
   * @throws Exception
   */
  @Test
  public void testNaming() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> dataStream1 =
        env.generateSequence(0, 0)
            .name("testSource1")
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .name("testMap");

    DataStream<Long> dataStream2 =
        env.generateSequence(0, 0)
            .name("testSource2")
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .name("testMap");

    DataStreamSink<Long> connected =
        dataStream1
            .connect(dataStream2)
            .flatMap(
                new CoFlatMapFunction<Long, Long, Long>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public void flatMap1(Long value, Collector<Long> out) throws Exception {}

                  @Override
                  public void flatMap2(Long value, Collector<Long> out) throws Exception {}
                })
            .name("testCoFlatMap")
            .windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(10)))
            .fold(
                0L,
                new FoldFunction<Long, Long>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public Long fold(Long accumulator, Long value) throws Exception {
                    return null;
                  }
                })
            .name("testWindowFold")
            .print();

    // test functionality through the operator names in the execution plan
    String plan = env.getExecutionPlan();

    assertTrue(plan.contains("testSource1"));
    assertTrue(plan.contains("testSource2"));
    assertTrue(plan.contains("testMap"));
    assertTrue(plan.contains("testMap"));
    assertTrue(plan.contains("testCoFlatMap"));
    assertTrue(plan.contains("testWindowFold"));
  }
Example #22
0
  @Test
  public void sinkKeyTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSink<Long> sink = env.generateSequence(1, 100).print();
    assertTrue(
        env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getStatePartitioner()
            == null);
    assertTrue(
        env.getStreamGraph()
                .getStreamNode(sink.getTransformation().getId())
                .getInEdges()
                .get(0)
                .getPartitioner()
            instanceof ForwardPartitioner);

    KeySelector<Long, Long> key1 =
        new KeySelector<Long, Long>() {

          private static final long serialVersionUID = 1L;

          @Override
          public Long getKey(Long value) throws Exception {
            return (long) 0;
          }
        };

    DataStreamSink<Long> sink2 = env.generateSequence(1, 100).keyBy(key1).print();

    assertTrue(
        env.getStreamGraph().getStreamNode(sink2.getTransformation().getId()).getStatePartitioner()
            != null);
    assertEquals(
        key1,
        env.getStreamGraph()
            .getStreamNode(sink2.getTransformation().getId())
            .getStatePartitioner());
    assertTrue(
        env.getStreamGraph()
                .getStreamNode(sink2.getTransformation().getId())
                .getInEdges()
                .get(0)
                .getPartitioner()
            instanceof HashPartitioner);

    KeySelector<Long, Long> key2 =
        new KeySelector<Long, Long>() {

          private static final long serialVersionUID = 1L;

          @Override
          public Long getKey(Long value) throws Exception {
            return (long) 0;
          }
        };

    DataStreamSink<Long> sink3 = env.generateSequence(1, 100).keyBy(key2).print();

    assertTrue(
        env.getStreamGraph().getStreamNode(sink3.getTransformation().getId()).getStatePartitioner()
            != null);
    assertEquals(
        key2,
        env.getStreamGraph()
            .getStreamNode(sink3.getTransformation().getId())
            .getStatePartitioner());
    assertTrue(
        env.getStreamGraph()
                .getStreamNode(sink3.getTransformation().getId())
                .getInEdges()
                .get(0)
                .getPartitioner()
            instanceof HashPartitioner);
  }
Example #23
0
  @Test
  public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    MapFunction<Long, Integer> mapFunction =
        new MapFunction<Long, Integer>() {
          @Override
          public Integer map(Long value) throws Exception {
            return null;
          }
        };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new NoOpSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));

    FlatMapFunction<Long, Integer> flatMapFunction =
        new FlatMapFunction<Long, Integer>() {
          private static final long serialVersionUID = 1L;

          @Override
          public void flatMap(Long value, Collector<Integer> out) throws Exception {}
        };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new NoOpSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));

    FilterFunction<Integer> filterFunction =
        new FilterFunction<Integer>() {
          @Override
          public boolean filter(Integer value) throws Exception {
            return false;
          }
        };

    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);

    unionFilter.addSink(new NoOpSink<Integer>());

    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    OutputSelector<Integer> outputSelector =
        new OutputSelector<Integer>() {
          @Override
          public Iterable<String> select(Integer value) {
            return null;
          }
        };

    SplitStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new NoOpSink<Integer>());
    List<OutputSelector<?>> outputSelectors =
        env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));

    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();

    StreamEdge splitEdge =
        env.getStreamGraph().getStreamEdge(unionFilter.getId(), sink.getTransformation().getId());
    assertEquals("a", splitEdge.getSelectedNames().get(0));

    ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper =
        new CoMapFunction<Integer, Integer, String>() {
          private static final long serialVersionUID = 1L;

          @Override
          public String map1(Integer value) {
            return null;
          }

          @Override
          public String map2(Integer value) {
            return null;
          }
        };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new NoOpSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));

    try {
      env.getStreamGraph().getStreamEdge(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }

    try {
      env.getStreamGraph().getStreamEdge(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
      fail(e.getMessage());
    }
  }
Example #24
0
  /** Tests whether parallelism gets set. */
  @Test
  public void testParallelism() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Tuple2<Long, Long>> src = env.fromElements(new Tuple2<>(0L, 0L));
    env.setParallelism(10);

    SingleOutputStreamOperator<Long, ?> map =
        src.map(
                new MapFunction<Tuple2<Long, Long>, Long>() {
                  @Override
                  public Long map(Tuple2<Long, Long> value) throws Exception {
                    return null;
                  }
                })
            .name("MyMap");

    DataStream<Long> windowed =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(10)))
            .fold(
                0L,
                new FoldFunction<Long, Long>() {
                  @Override
                  public Long fold(Long accumulator, Long value) throws Exception {
                    return null;
                  }
                });

    windowed.addSink(new NoOpSink<Long>());

    DataStreamSink<Long> sink =
        map.addSink(
            new SinkFunction<Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void invoke(Long value) throws Exception {}
            });

    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    env.setParallelism(7);

    // Some parts, such as windowing rely on the fact that previous operators have a parallelism
    // set when instantiating the Discretizer. This would break if we dynamically changed
    // the parallelism of operations when changing the setting on the Execution Environment.
    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(
        10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());

    try {
      src.setParallelism(3);
      fail();
    } catch (IllegalArgumentException success) {
      // do nothing
    }

    DataStreamSource<Long> parallelSource = env.generateSequence(0, 0);
    parallelSource.addSink(new NoOpSink<Long>());
    assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    parallelSource.setParallelism(3);
    assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

    map.setParallelism(2);
    assertEquals(2, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());

    sink.setParallelism(4);
    assertEquals(
        4, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
  }
Example #25
0
  /**
   * Tests union functionality. This ensures that self-unions and unions of streams with differing
   * parallelism work.
   *
   * @throws Exception
   */
  @Test
  public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);

    DataStream<Long> input1 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> selfUnion =
        input1
            .union(input1)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> input6 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> selfUnionDifferentPartition =
        input6
            .broadcast()
            .union(input6)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                });

    DataStream<Long> input2 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> input3 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(2);

    DataStream<Long> unionDifferingParallelism =
        input2
            .union(input3)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> input4 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(2);

    DataStream<Long> input5 =
        env.generateSequence(0, 0)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    DataStream<Long> unionDifferingPartitioning =
        input4
            .broadcast()
            .union(input5)
            .map(
                new MapFunction<Long, Long>() {
                  @Override
                  public Long map(Long value) throws Exception {
                    return null;
                  }
                })
            .setParallelism(4);

    StreamGraph streamGraph = env.getStreamGraph();

    // verify self union
    assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
      assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
    }

    // verify self union with differnt partitioners
    assertTrue(
        streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
    boolean hasForward = false;
    boolean hasBroadcast = false;
    for (StreamEdge edge :
        streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
      if (edge.getPartitioner() instanceof ForwardPartitioner) {
        hasForward = true;
      }
      if (edge.getPartitioner() instanceof BroadcastPartitioner) {
        hasBroadcast = true;
      }
    }
    assertTrue(hasForward && hasBroadcast);

    // verify union of streams with differing parallelism
    assertTrue(
        streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
    for (StreamEdge edge :
        streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
      if (edge.getSourceId() == input2.getId()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
      } else if (edge.getSourceId() == input3.getId()) {
        assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
      } else {
        fail("Wrong input edge.");
      }
    }

    // verify union of streams with differing partitionings
    assertTrue(
        streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
    for (StreamEdge edge :
        streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
      if (edge.getSourceId() == input4.getId()) {
        assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
      } else if (edge.getSourceId() == input5.getId()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
      } else {
        fail("Wrong input edge.");
      }
    }
  }