Exemplo n.º 1
0
  public static void main(final String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // get input data
    final DataStream<String> text = getTextDataStream(env);

    final DataStream<Tuple2<String, Integer>> counts =
        text
            // split up the lines in pairs (2-tuples) containing: (word,1)
            // this is done by a bolt that is wrapped accordingly
            .transform(
                "BoltTokenizer",
                TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)),
                new BoltWrapper<String, Tuple2<String, Integer>>(new BoltTokenizer()))
            // group by the tuple field "0" and sum up tuple field "1"
            .keyBy(0)
            .sum(1);

    // emit result
    if (fileOutput) {
      counts.writeAsText(outputPath);
    } else {
      counts.print();
    }

    // execute program
    env.execute("Streaming WordCount with bolt tokenizer");
  }
Exemplo n.º 2
0
    /**
     * Completes the co-group operation with the user function that is executed for windowed groups.
     */
    public <T> DataStream<T> apply(
        CoGroupFunction<T1, T2, T> function, TypeInformation<T> resultType) {
      // clean the closure
      function = input1.getExecutionEnvironment().clean(function);

      UnionTypeInfo<T1, T2> unionType = new UnionTypeInfo<>(input1.getType(), input2.getType());
      UnionKeySelector<T1, T2, KEY> unionKeySelector =
          new UnionKeySelector<>(keySelector1, keySelector2);

      DataStream<TaggedUnion<T1, T2>> taggedInput1 =
          input1.map(new Input1Tagger<T1, T2>()).returns(unionType);
      DataStream<TaggedUnion<T1, T2>> taggedInput2 =
          input2.map(new Input2Tagger<T1, T2>()).returns(unionType);

      DataStream<TaggedUnion<T1, T2>> unionStream = taggedInput1.union(taggedInput2);

      // we explicitly create the keyed stream to manually pass the key type information in
      WindowedStream<TaggedUnion<T1, T2>, KEY, W> windowOp =
          new KeyedStream<TaggedUnion<T1, T2>, KEY>(unionStream, unionKeySelector, keyType)
              .window(windowAssigner);

      if (trigger != null) {
        windowOp.trigger(trigger);
      }
      if (evictor != null) {
        windowOp.evictor(evictor);
      }

      return windowOp.apply(new CoGroupWindowFunction<T1, T2, T, KEY, W>(function), resultType);
    }
  /**
   * Note: this test fails if we don't have the synchronized block in {@link
   * org.apache.flink.streaming.runtime.tasks.SourceStreamTask.SourceOutput}
   */
  @Test
  public void testOneInputOperatorWithoutChaining() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);

    DataStream<String> source = env.addSource(new InfiniteTestSource());

    source.transform(
        "Custom Operator",
        BasicTypeInfo.STRING_TYPE_INFO,
        new TimerOperator(StreamOperator.ChainingStrategy.NEVER));

    boolean testSuccess = false;
    try {
      env.execute("Timer test");
    } catch (JobExecutionException e) {
      if (e.getCause() instanceof TimerException) {
        TimerException te = (TimerException) e.getCause();
        if (te.getCause() instanceof RuntimeException) {
          RuntimeException re = (RuntimeException) te.getCause();
          if (re.getMessage().equals("TEST SUCCESS")) {
            testSuccess = true;
          } else {
            throw e;
          }
        } else {
          throw e;
        }
      } else {
        throw e;
      }
    }
    Assert.assertTrue(testSuccess);
  }
Exemplo n.º 4
0
  private static JobGraph createJobGraphWithKeyedState(
      int parallelism,
      int maxParallelism,
      int numberKeys,
      int numberElements,
      boolean terminateAfterEmission,
      int checkpointingInterval) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());

    DataStream<Integer> input =
        env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission))
            .keyBy(
                new KeySelector<Integer, Integer>() {
                  private static final long serialVersionUID = -7952298871120320940L;

                  @Override
                  public Integer getKey(Integer value) throws Exception {
                    return value;
                  }
                });

    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

    DataStream<Tuple2<Integer, Integer>> result =
        input.flatMap(new SubtaskIndexFlatMapper(numberElements));

    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

    return env.getStreamGraph().getJobGraph();
  }
Exemplo n.º 5
0
 /**
  * Applies an aggregation that gives the maximum value of every window of the data stream at the
  * given position.
  *
  * @param positionToMax The position to maximize
  * @return The transformed DataStream.
  */
 public SingleOutputStreamOperator<T, ?> max(int positionToMax) {
   return aggregate(
       new ComparableAggregator<>(
           positionToMax,
           input.getType(),
           AggregationFunction.AggregationType.MAX,
           input.getExecutionConfig()));
 }
Exemplo n.º 6
0
 /**
  * Applies an aggregation that that gives the maximum element of the pojo data stream by the given
  * field expression for every window. A field expression is either the name of a public field or a
  * getter method with parentheses of the {@link DataStream}S underlying type. A dot can be used to
  * drill down into objects, as in {@code "field1.getInnerField2()" }.
  *
  * @param field The field expression based on which the aggregation will be applied.
  * @param first If True then in case of field equality the first object will be returned
  * @return The transformed DataStream.
  */
 public SingleOutputStreamOperator<T, ?> maxBy(String field, boolean first) {
   return aggregate(
       new ComparableAggregator<>(
           field,
           input.getType(),
           AggregationFunction.AggregationType.MAXBY,
           first,
           input.getExecutionConfig()));
 }
Exemplo n.º 7
0
 /**
  * Applies an aggregation that gives the minimum element of every window of the data stream by the
  * given position. If more elements have the same minimum value the operator returns either the
  * first or last one depending on the parameter setting.
  *
  * @param positionToMinBy The position to minimize
  * @param first If true, then the operator return the first element with the minimum value,
  *     otherwise returns the last
  * @return The transformed DataStream.
  */
 public SingleOutputStreamOperator<T, ?> minBy(int positionToMinBy, boolean first) {
   return aggregate(
       new ComparableAggregator<>(
           positionToMinBy,
           input.getType(),
           AggregationFunction.AggregationType.MINBY,
           first,
           input.getExecutionConfig()));
 }
Exemplo n.º 8
0
 /**
  * Applies an aggregation that that gives the maximum value of the pojo data stream at the given
  * field expression for every window. A field expression is either the name of a public field or a
  * getter method with parentheses of the {@link DataStream DataStreams} underlying type. A dot can
  * be used to drill down into objects, as in {@code "field1.getInnerField2()" }.
  *
  * @param field The field expression based on which the aggregation will be applied.
  * @return The transformed DataStream.
  */
 public SingleOutputStreamOperator<T, ?> max(String field) {
   return aggregate(
       new ComparableAggregator<>(
           field,
           input.getType(),
           AggregationFunction.AggregationType.MAX,
           false,
           input.getExecutionConfig()));
 }
Exemplo n.º 9
0
  /**
   * Applies the given fold function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the reduce function is
   * interpreted as a regular non-windowed stream.
   *
   * @param function The fold function.
   * @return The data stream that is the result of applying the fold function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> fold(R initialValue, FoldFunction<T, R> function) {
    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    TypeInformation<R> resultType =
        TypeExtractor.getFoldReturnTypes(
            function, input.getType(), Utils.getCallLocationName(), true);

    return apply(new FoldAllWindowFunction<W, T, R>(initialValue, function), resultType);
  }
Exemplo n.º 10
0
  /**
   * Applies a reduce function to the window. The window function is called for each evaluation of
   * the window for each key individually. The output of the reduce function is interpreted as a
   * regular non-windowed stream.
   *
   * <p>This window will try and pre-aggregate data as much as the window policies permit. For
   * example, tumbling time windows can perfectly pre-aggregate the data, meaning that only one
   * element per key is stored. Sliding time windows will pre-aggregate on the granularity of the
   * slide interval, so a few elements are stored per key (one per slide interval). Custom windows
   * may not be able to pre-aggregate, or may need to store extra values in an aggregation tree.
   *
   * @param function The reduce function.
   * @return The data stream that is the result of applying the reduce function to the window.
   */
  public SingleOutputStreamOperator<T, ?> reduce(ReduceFunction<T> function) {
    if (function instanceof RichFunction) {
      throw new UnsupportedOperationException(
          "ReduceFunction of reduce can not be a RichFunction. "
              + "Please use apply(ReduceFunction, WindowFunction) instead.");
    }

    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    String callLocation = Utils.getCallLocationName();
    String udfName = "Reduce at " + callLocation;

    SingleOutputStreamOperator<T, ?> result =
        createFastTimeOperatorIfValid(function, input.getType(), udfName);
    if (result != null) {
      return result;
    }

    String opName =
        "NonParallelTriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    OneInputStreamOperator<T, T> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  new ReduceIterableAllWindowFunction<W, T>(function),
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new PreAggregatingHeapWindowBuffer.Factory<>(function),
                  new ReduceIterableAllWindowFunction<W, T>(function),
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, input.getType(), operator).setParallelism(1);
  }
  @Override
  protected void testProgram() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<String> text = env.fromElements(WordCountData.TEXT);

    DataStream<Tuple2<String, Integer>> counts =
        text.flatMap(new CsvOutputFormatITCase.Tokenizer()).keyBy(0).sum(1);

    counts.writeAsText(resultPath);

    env.execute("WriteAsTextTest");
  }
Exemplo n.º 12
0
  @Test
  public void testChannelSelectors() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Long> src = env.generateSequence(0, 0);

    DataStream<Long> broadcast = src.broadcast();
    DataStreamSink<Long> broadcastSink = broadcast.print();
    StreamPartitioner<?> broadcastPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), broadcastSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

    DataStream<Long> shuffle = src.shuffle();
    DataStreamSink<Long> shuffleSink = shuffle.print();
    StreamPartitioner<?> shufflePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), shuffleSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(shufflePartitioner instanceof ShufflePartitioner);

    DataStream<Long> forward = src.forward();
    DataStreamSink<Long> forwardSink = forward.print();
    StreamPartitioner<?> forwardPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), forwardSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(forwardPartitioner instanceof ForwardPartitioner);

    DataStream<Long> rebalance = src.rebalance();
    DataStreamSink<Long> rebalanceSink = rebalance.print();
    StreamPartitioner<?> rebalancePartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), rebalanceSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

    DataStream<Long> global = src.global();
    DataStreamSink<Long> globalSink = global.print();
    StreamPartitioner<?> globalPartitioner =
        env.getStreamGraph()
            .getStreamEdges(src.getId(), globalSink.getTransformation().getId())
            .get(0)
            .getPartitioner();
    assertTrue(globalPartitioner instanceof GlobalPartitioner);
  }
Exemplo n.º 13
0
    /**
     * Completes the co-group operation with the user function that is executed for windowed groups.
     */
    public <T> DataStream<T> apply(CoGroupFunction<T1, T2, T> function) {

      TypeInformation<T> resultType =
          TypeExtractor.getBinaryOperatorReturnType(
              function,
              CoGroupFunction.class,
              true,
              true,
              input1.getType(),
              input2.getType(),
              "CoGroup",
              false);

      return apply(function, resultType);
    }
Exemplo n.º 14
0
  private DataStream<Tuple> processInput(
      String boltId,
      IRichBolt userBolt,
      GlobalStreamId streamId,
      Grouping grouping,
      Map<String, DataStream<Tuple>> producer) {

    assert (userBolt != null);
    assert (boltId != null);
    assert (streamId != null);
    assert (grouping != null);
    assert (producer != null);

    final String producerId = streamId.get_componentId();
    final String inputStreamId = streamId.get_streamId();

    DataStream<Tuple> inputStream = producer.get(inputStreamId);

    final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
    declarers.put(boltId, declarer);
    userBolt.declareOutputFields(declarer);
    this.outputStreams.put(boltId, declarer.outputStreams);

    // if producer was processed already
    if (grouping.is_set_shuffle()) {
      // Storm uses a round-robin shuffle strategy
      inputStream = inputStream.rebalance();
    } else if (grouping.is_set_fields()) {
      // global grouping is emulated in Storm via an empty fields grouping list
      final List<String> fields = grouping.get_fields();
      if (fields.size() > 0) {
        FlinkOutputFieldsDeclarer prodDeclarer = this.declarers.get(producerId);
        inputStream =
            inputStream.keyBy(
                prodDeclarer.getGroupingFieldIndexes(inputStreamId, grouping.get_fields()));
      } else {
        inputStream = inputStream.global();
      }
    } else if (grouping.is_set_all()) {
      inputStream = inputStream.broadcast();
    } else if (!grouping.is_set_local_or_shuffle()) {
      throw new UnsupportedOperationException(
          "Flink only supports (local-or-)shuffle, fields, all, and global grouping");
    }

    return inputStream;
  }
Exemplo n.º 15
0
  @PublicEvolving
  public <R> SingleOutputStreamOperator<R> transform(
      String functionName,
      TypeInformation<R> outTypeInfo,
      TwoInputStreamOperator<IN1, IN2, R> operator) {

    // read the output type of the input Transforms to coax out errors about MissingTypeInfo
    inputStream1.getType();
    inputStream2.getType();

    TwoInputTransformation<IN1, IN2, R> transform =
        new TwoInputTransformation<>(
            inputStream1.getTransformation(),
            inputStream2.getTransformation(),
            functionName,
            operator,
            outTypeInfo,
            environment.getParallelism());

    if (inputStream1 instanceof KeyedStream && inputStream2 instanceof KeyedStream) {
      KeyedStream<IN1, ?> keyedInput1 = (KeyedStream<IN1, ?>) inputStream1;
      KeyedStream<IN2, ?> keyedInput2 = (KeyedStream<IN2, ?>) inputStream2;

      TypeInformation<?> keyType1 = keyedInput1.getKeyType();
      TypeInformation<?> keyType2 = keyedInput2.getKeyType();
      if (!(keyType1.canEqual(keyType2) && keyType1.equals(keyType2))) {
        throw new UnsupportedOperationException(
            "Key types if input KeyedStreams "
                + "don't match: "
                + keyType1
                + " and "
                + keyType2
                + ".");
      }

      transform.setStateKeySelectors(keyedInput1.getKeySelector(), keyedInput2.getKeySelector());
      transform.setStateKeyType(keyType1);
    }

    @SuppressWarnings({"unchecked", "rawtypes"})
    SingleOutputStreamOperator<R> returnStream =
        new SingleOutputStreamOperator(environment, transform);

    getExecutionEnvironment().addOperator(transform);

    return returnStream;
  }
Exemplo n.º 16
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer.
   *
   * @param preAggregator The reduce function that is used for pre-aggregation
   * @param function The window function.
   * @param resultType Type information for the result type of the window function
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      ReduceFunction<T> preAggregator,
      AllWindowFunction<T, R, W> function,
      TypeInformation<R> resultType) {
    if (preAggregator instanceof RichFunction) {
      throw new UnsupportedOperationException("Pre-aggregator of apply can not be a RichFunction.");
    }

    // clean the closures
    function = input.getExecutionEnvironment().clean(function);
    preAggregator = input.getExecutionEnvironment().clean(preAggregator);

    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowApply at " + callLocation;

    String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    OneInputStreamOperator<T, R> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  new ReduceApplyAllWindowFunction<>(preAggregator, function),
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new PreAggregatingHeapWindowBuffer.Factory<>(preAggregator),
                  new ReduceApplyAllWindowFunction<>(preAggregator, function),
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, resultType, operator).setParallelism(1);
  }
Exemplo n.º 17
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Not that this function requires that all data in the windows is buffered until the window is
   * evaluated, as the function provides no means of pre-aggregation.
   *
   * @param function The window function.
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      AllWindowFunction<Iterable<T>, R, W> function, TypeInformation<R> resultType) {
    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowApply at " + callLocation;

    SingleOutputStreamOperator<R, ?> result =
        createFastTimeOperatorIfValid(function, resultType, udfName);
    if (result != null) {
      return result;
    }

    String opName = "TriggerWindow(" + windowAssigner + ", " + trigger + ", " + udfName + ")";

    NonKeyedWindowOperator<T, R, W> operator;

    boolean setProcessingTime =
        input.getExecutionEnvironment().getStreamTimeCharacteristic()
            == TimeCharacteristic.ProcessingTime;

    if (evictor != null) {
      operator =
          new EvictingNonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  function,
                  trigger,
                  evictor)
              .enableSetProcessingTime(setProcessingTime);

    } else {
      operator =
          new NonKeyedWindowOperator<>(
                  windowAssigner,
                  windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
                  new HeapWindowBuffer.Factory<T>(),
                  function,
                  trigger)
              .enableSetProcessingTime(setProcessingTime);
    }

    return input.transform(opName, resultType, operator).setParallelism(1);
  }
Exemplo n.º 18
0
  /**
   * Creates a new {@link DataStream} by merging {@link DataStream} outputs of the same type with
   * each other. The DataStreams merged using this operator will be transformed simultaneously.
   *
   * @param streams The DataStreams to union output with.
   * @return The {@link DataStream}.
   */
  @SafeVarargs
  public final DataStream<T> union(DataStream<T>... streams) {
    List<StreamTransformation<T>> unionedTransforms = new ArrayList<>();
    unionedTransforms.add(this.transformation);

    for (DataStream<T> newStream : streams) {
      if (!getType().equals(newStream.getType())) {
        throw new IllegalArgumentException(
            "Cannot union streams of different types: "
                + getType()
                + " and "
                + newStream.getType());
      }

      unionedTransforms.add(newStream.getTransformation());
    }
    return new DataStream<>(this.environment, new UnionTransformation<>(unionedTransforms));
  }
Exemplo n.º 19
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer.
   *
   * @param preAggregator The reduce function that is used for pre-aggregation
   * @param function The window function.
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      ReduceFunction<T> preAggregator, AllWindowFunction<T, R, W> function) {
    TypeInformation<T> inType = input.getType();
    TypeInformation<R> resultType =
        TypeExtractor.getUnaryOperatorReturnType(
            function, AllWindowFunction.class, true, true, inType, null, false);

    return apply(preAggregator, function, resultType);
  }
Exemplo n.º 20
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<Integer, String>(0, "")), map.getType());

    WindowedDataStream<String> window =
        map.window(Count.of(5))
            .mapWindow(
                new WindowMapFunction<Tuple2<Integer, String>, String>() {
                  @Override
                  public void mapWindow(
                      Iterable<Tuple2<Integer, String>> values, Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .foldWindow(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                })
            .flatten();

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
 @Override
 public <T> DataStreamSink<T> produceIntoKafka(
     DataStream<T> stream,
     String topic,
     KeyedSerializationSchema<T> serSchema,
     Properties props,
     KafkaPartitioner<T> partitioner) {
   FlinkKafkaProducer08<T> prod = new FlinkKafkaProducer08<>(topic, serSchema, props, partitioner);
   prod.setFlushOnCheckpoint(true);
   return stream.addSink(prod);
 }
Exemplo n.º 22
0
  private static JobGraph createJobGraphWithOperatorState(
      int parallelism, int maxParallelism, boolean partitionedOperatorState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(Long.MAX_VALUE);
    env.setRestartStrategy(RestartStrategies.noRestart());

    StateSourceBase.workStartedLatch = new CountDownLatch(1);

    DataStream<Integer> input =
        env.addSource(
            partitionedOperatorState
                ? new PartitionedStateSource()
                : new NonPartitionedStateSource());

    input.addSink(new DiscardingSink<Integer>());

    return env.getStreamGraph().getJobGraph();
  }
Exemplo n.º 23
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<>(0, "")), map.getType());

    DataStream<String> window =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .apply(
                new AllWindowFunction<Tuple2<Integer, String>, String, GlobalWindow>() {
                  @Override
                  public void apply(
                      GlobalWindow window,
                      Iterable<Tuple2<Integer, String>> values,
                      Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .fold(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                });

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Exemplo n.º 24
0
  /**
   * Applies a CoMap transformation on a {@link ConnectedStreams} and maps the output to a common
   * type. The transformation calls a {@link CoMapFunction#map1} for each element of the first input
   * and {@link CoMapFunction#map2} for each element of the second input. Each CoMapFunction call
   * returns exactly one element.
   *
   * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams
   * @return The transformed {@link DataStream}
   */
  public <R> SingleOutputStreamOperator<R> map(CoMapFunction<IN1, IN2, R> coMapper) {

    TypeInformation<R> outTypeInfo =
        TypeExtractor.getBinaryOperatorReturnType(
            coMapper,
            CoMapFunction.class,
            false,
            true,
            getType1(),
            getType2(),
            Utils.getCallLocationName(),
            true);

    return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper)));
  }
Exemplo n.º 25
0
  public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // parse user parameters
    //		ParameterTool parameterTool = ParameterTool.fromArgs(args);

    //		DataStream<String> messageStream = env.addSource(new
    // FlinkKafkaConsumer(parameterTool.getRequired("topic"), new SimpleStringSchema(),
    // parameterTool.getProperties()));

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "node2:9092");
    properties.setProperty("zookeeper.connect", "node2:2181");
    properties.setProperty("group.id", "1");
    DataStream<String> messageStream =
        env.addSource(
            new FlinkKafkaConsumer082<>("demo", new SimpleStringSchema(), properties)); // print();

    messageStream.print();
    System.out.print(messageStream + " Hello\n");

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    //		messageStream.rebalance().map(new MapFunction<String, String>() {
    //			private static final long serialVersionUID = -6867736771747690202L;

    //			@Override
    //			public String map(String value) throws Exception {
    //				return "Kafka and Flink says: " + value;
    //			}
    //		}).print();

    env.execute("kafka consumer");
  }
Exemplo n.º 26
0
 /**
  * KeyBy operation for connected data stream. Assigns keys to the elements of input1 and input2
  * using keySelector1 and keySelector2.
  *
  * @param keySelector1 The {@link KeySelector} used for grouping the first input
  * @param keySelector2 The {@link KeySelector} used for grouping the second input
  * @return The partitioned {@link ConnectedStreams}
  */
 public ConnectedStreams<IN1, IN2> keyBy(
     KeySelector<IN1, ?> keySelector1, KeySelector<IN2, ?> keySelector2) {
   return new ConnectedStreams<>(
       environment, inputStream1.keyBy(keySelector1), inputStream2.keyBy(keySelector2));
 }
Exemplo n.º 27
0
  /**
   * Runs the following program:
   *
   * <pre>
   *     [ (source)->(filter)->(map) ] -> [ (map) ] -> [ (groupBy/reduce)->(sink) ]
   * </pre>
   */
  @Test
  public void runCheckpointedProgram() {

    final long NUM_STRINGS = 10000000L;
    assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

    try {
      StreamExecutionEnvironment env =
          StreamExecutionEnvironment.createRemoteEnvironment(
              "localhost", cluster.getJobManagerRPCPort());
      env.setParallelism(PARALLELISM);
      env.enableCheckpointing(500);
      env.getConfig().disableSysoutLogging();

      DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS));

      stream
          // -------------- first vertex, chained to the source ----------------
          .filter(new StringRichFilterFunction())

          // -------------- seconds vertex - the stateful one that also fails ----------------
          .map(new StringPrefixCountRichMapFunction())
          .startNewChain()
          .map(new StatefulCounterFunction())

          // -------------- third vertex - reducer and the sink ----------------
          .groupBy("prefix")
          .reduce(new OnceFailingReducer(NUM_STRINGS))
          .addSink(
              new RichSinkFunction<PrefixCount>() {

                private Map<Character, Long> counts = new HashMap<Character, Long>();

                @Override
                public void invoke(PrefixCount value) {
                  Character first = value.prefix.charAt(0);
                  Long previous = counts.get(first);
                  if (previous == null) {
                    counts.put(first, value.count);
                  } else {
                    counts.put(first, Math.max(previous, value.count));
                  }
                }

                //						@Override
                //						public void close() {
                //							for (Long count : counts.values()) {
                //								assertEquals(NUM_STRINGS / 40, count.longValue());
                //							}
                //						}
              });

      env.execute();

      long filterSum = 0;
      for (long l : StringRichFilterFunction.counts) {
        filterSum += l;
      }

      long mapSum = 0;
      for (long l : StringPrefixCountRichMapFunction.counts) {
        mapSum += l;
      }

      long countSum = 0;
      for (long l : StatefulCounterFunction.counts) {
        countSum += l;
      }

      // verify that we counted exactly right

      // this line should be uncommented once the "exactly one off by one" is fixed
      // if this fails we see at which point the count is off
      assertEquals(NUM_STRINGS, filterSum);
      assertEquals(NUM_STRINGS, mapSum);
      assertEquals(NUM_STRINGS, countSum);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Exemplo n.º 28
0
  /**
   * Checks that a certain event sequence is recognized
   *
   * @throws Exception
   */
  @Test
  public void testSimplePatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Event> input =
        env.fromElements(
            new Event(1, "barfoo", 1.0),
            new Event(2, "start", 2.0),
            new Event(3, "foobar", 3.0),
            new SubEvent(4, "foo", 4.0, 1.0),
            new Event(5, "middle", 5.0),
            new SubEvent(6, "middle", 6.0, 2.0),
            new SubEvent(7, "bar", 3.0, 3.0),
            new Event(42, "42", 42.0),
            new Event(8, "end", 1.0));

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .subtype(SubEvent.class)
            .where(
                new FilterFunction<SubEvent>() {

                  @Override
                  public boolean filter(SubEvent value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // expected sequence of matching event ids
    expected = "2,6,8";

    env.execute();
  }
Exemplo n.º 29
0
  @Test
  public void testSimpleKeyedPatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);

    // (Event, timestamp)
    DataStream<Event> input =
        env.fromElements(
                Tuple2.of(new Event(1, "start", 1.0), 5L),
                Tuple2.of(new Event(1, "middle", 2.0), 1L),
                Tuple2.of(new Event(2, "middle", 2.0), 4L),
                Tuple2.of(new Event(2, "start", 2.0), 3L),
                Tuple2.of(new Event(1, "end", 3.0), 3L),
                Tuple2.of(new Event(3, "start", 4.1), 5L),
                Tuple2.of(new Event(1, "end", 4.0), 10L),
                Tuple2.of(new Event(2, "end", 2.0), 8L),
                Tuple2.of(new Event(1, "middle", 5.0), 7L),
                Tuple2.of(new Event(3, "middle", 6.0), 9L),
                Tuple2.of(new Event(3, "end", 7.0), 7L),
                // last element for high final watermark
                Tuple2.of(new Event(3, "end", 7.0), 100L))
            .assignTimestampsAndWatermarks(
                new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

                  @Override
                  public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
                    return element.f1;
                  }

                  @Override
                  public Watermark checkAndGetNextWatermark(
                      Tuple2<Event, Long> lastElement, long extractedTimestamp) {
                    return new Watermark(lastElement.f1 - 5);
                  }
                })
            .map(
                new MapFunction<Tuple2<Event, Long>, Event>() {

                  @Override
                  public Event map(Tuple2<Event, Long> value) throws Exception {
                    return value.f0;
                  }
                })
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "1,1,1\n2,2,2";

    env.execute();
  }
Exemplo n.º 30
0
 /**
  * Gets the type of the first input
  *
  * @return The type of the first input
  */
 public TypeInformation<IN1> getType1() {
   return inputStream1.getType();
 }