Exemplo n.º 1
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<>(0, "")), map.getType());

    DataStream<String> window =
        map.windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .apply(
                new AllWindowFunction<Tuple2<Integer, String>, String, GlobalWindow>() {
                  @Override
                  public void apply(
                      GlobalWindow window,
                      Iterable<Tuple2<Integer, String>> values,
                      Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .windowAll(GlobalWindows.create())
            .trigger(PurgingTrigger.of(CountTrigger.of(5)))
            .fold(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  private static final long serialVersionUID = 1L;

                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                });

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
  @Test
  public void testAllReduceDriverImmutableEmpty() {
    try {
      TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
          new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>();

      List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
      TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0));
      MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
      context.setDriverStrategy(DriverStrategy.ALL_REDUCE);

      context.setInput1(input, typeInfo.createSerializer());
      context.setCollector(new DiscardingOutputCollector<Tuple2<String, Integer>>());

      AllReduceDriver<Tuple2<String, Integer>> driver =
          new AllReduceDriver<Tuple2<String, Integer>>();
      driver.setup(context);
      driver.prepare();
      driver.run();
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Exemplo n.º 3
0
    @Override
    public <E> void validateCustomPartitioner(
        Partitioner<E> partitioner, TypeInformation<E> typeInfo) {
      if (keyFields.size() != 1) {
        throw new InvalidProgramException(
            "Custom partitioners can only be used with keys that have one key field.");
      }

      if (typeInfo == null) {
        try {
          typeInfo = TypeExtractor.getPartitionerTypes(partitioner);
        } catch (Throwable t) {
          // best effort check, so we ignore exceptions
        }
      }

      if (typeInfo != null && !(typeInfo instanceof GenericTypeInfo)) {
        TypeInformation<?> keyType = keyFields.get(0).getType();
        if (!keyType.equals(typeInfo)) {
          throw new InvalidProgramException(
              "The partitioner is incompatible with the key type. "
                  + "Partitioner type: "
                  + typeInfo
                  + " , key type: "
                  + keyType);
        }
      }
    }
Exemplo n.º 4
0
  public static void main(final String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // get input data
    final DataStream<String> text = getTextDataStream(env);

    final DataStream<Tuple2<String, Integer>> counts =
        text
            // split up the lines in pairs (2-tuples) containing: (word,1)
            // this is done by a bolt that is wrapped accordingly
            .transform(
                "BoltTokenizer",
                TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)),
                new BoltWrapper<String, Tuple2<String, Integer>>(new BoltTokenizer()))
            // group by the tuple field "0" and sum up tuple field "1"
            .keyBy(0)
            .sum(1);

    // emit result
    if (fileOutput) {
      counts.writeAsText(outputPath);
    } else {
      counts.print();
    }

    // execute program
    env.execute("Streaming WordCount with bolt tokenizer");
  }
Exemplo n.º 5
0
  /**
   * Applies a FlatMap transformation on a {@link DataStream}. The transformation calls a {@link
   * FlatMapFunction} for each element of the DataStream. Each FlatMapFunction call can return any
   * number of elements including none. The user can also extend {@link RichFlatMapFunction} to gain
   * access to other features provided by the {@link
   * org.apache.flink.api.common.functions.RichFunction} interface.
   *
   * @param flatMapper The FlatMapFunction that is called for each element of the DataStream
   * @param <R> output type
   * @return The transformed {@link DataStream}.
   */
  public <R> SingleOutputStreamOperator<R> flatMap(FlatMapFunction<T, R> flatMapper) {

    TypeInformation<R> outType =
        TypeExtractor.getFlatMapReturnTypes(
            clean(flatMapper), getType(), Utils.getCallLocationName(), true);

    return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper)));
  }
Exemplo n.º 6
0
  /**
   * Applies a window function to the window. The window function is called for each evaluation of
   * the window for each key individually. The output of the window function is interpreted as a
   * regular non-windowed stream.
   *
   * <p>Not that this function requires that all data in the windows is buffered until the window is
   * evaluated, as the function provides no means of pre-aggregation.
   *
   * @param function The window function.
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(AllWindowFunction<Iterable<T>, R, W> function) {
    @SuppressWarnings("unchecked, rawtypes")
    TypeInformation<Iterable<T>> iterTypeInfo = new GenericTypeInfo<>((Class) Iterable.class);
    TypeInformation<R> resultType =
        TypeExtractor.getUnaryOperatorReturnType(
            function, AllWindowFunction.class, true, true, iterTypeInfo, null, false);

    return apply(function, resultType);
  }
Exemplo n.º 7
0
  /**
   * Applies the given window function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the window function is
   * interpreted as a regular non-windowed stream.
   *
   * <p>Arriving data is pre-aggregated using the given pre-aggregation reducer.
   *
   * @param preAggregator The reduce function that is used for pre-aggregation
   * @param function The window function.
   * @return The data stream that is the result of applying the window function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> apply(
      ReduceFunction<T> preAggregator, AllWindowFunction<T, R, W> function) {
    TypeInformation<T> inType = input.getType();
    TypeInformation<R> resultType =
        TypeExtractor.getUnaryOperatorReturnType(
            function, AllWindowFunction.class, true, true, inType, null, false);

    return apply(preAggregator, function, resultType);
  }
Exemplo n.º 8
0
  @Test
  public void testTypeInfo() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<Long> src1 = env.generateSequence(0, 0);
    assertEquals(TypeExtractor.getForClass(Long.class), src1.getType());

    DataStream<Tuple2<Integer, String>> map =
        src1.map(
            new MapFunction<Long, Tuple2<Integer, String>>() {
              @Override
              public Tuple2<Integer, String> map(Long value) throws Exception {
                return null;
              }
            });

    assertEquals(TypeExtractor.getForObject(new Tuple2<Integer, String>(0, "")), map.getType());

    WindowedDataStream<String> window =
        map.window(Count.of(5))
            .mapWindow(
                new WindowMapFunction<Tuple2<Integer, String>, String>() {
                  @Override
                  public void mapWindow(
                      Iterable<Tuple2<Integer, String>> values, Collector<String> out)
                      throws Exception {}
                });

    assertEquals(TypeExtractor.getForClass(String.class), window.getType());

    DataStream<CustomPOJO> flatten =
        window
            .foldWindow(
                new CustomPOJO(),
                new FoldFunction<String, CustomPOJO>() {
                  @Override
                  public CustomPOJO fold(CustomPOJO accumulator, String value) throws Exception {
                    return null;
                  }
                })
            .flatten();

    assertEquals(TypeExtractor.getForClass(CustomPOJO.class), flatten.getType());
  }
Exemplo n.º 9
0
  /**
   * Applies the given fold function to each window. The window function is called for each
   * evaluation of the window for each key individually. The output of the reduce function is
   * interpreted as a regular non-windowed stream.
   *
   * @param function The fold function.
   * @return The data stream that is the result of applying the fold function to the window.
   */
  public <R> SingleOutputStreamOperator<R, ?> fold(R initialValue, FoldFunction<T, R> function) {
    // clean the closure
    function = input.getExecutionEnvironment().clean(function);

    TypeInformation<R> resultType =
        TypeExtractor.getFoldReturnTypes(
            function, input.getType(), Utils.getCallLocationName(), true);

    return apply(new FoldAllWindowFunction<W, T, R>(initialValue, function), resultType);
  }
 private TypeSerializer<Object> createSerializer(Object key, int pos) {
   if (key == null) {
     throw new NullKeyFieldException(pos);
   }
   try {
     TypeInformation<Object> info = TypeExtractor.getForObject(key);
     return info.createSerializer(executionConfig);
   } catch (Throwable t) {
     throw new RuntimeException("Could not create key serializer for type " + key);
   }
 }
Exemplo n.º 11
0
    /** Specifies a {@link KeySelector} for elements from the second input. */
    public EqualTo equalTo(KeySelector<T2, KEY> keySelector) {
      TypeInformation<KEY> otherKey =
          TypeExtractor.getKeySelectorTypes(keySelector, input2.getType());
      if (!otherKey.equals(this.keyType)) {
        throw new IllegalArgumentException(
            "The keys for the two inputs are not equal: "
                + "first key = "
                + this.keyType
                + " , second key = "
                + otherKey);
      }

      return new EqualTo(input2.clean(keySelector));
    }
Exemplo n.º 12
0
    /**
     * Completes the co-group operation with the user function that is executed for windowed groups.
     */
    public <T> DataStream<T> apply(CoGroupFunction<T1, T2, T> function) {

      TypeInformation<T> resultType =
          TypeExtractor.getBinaryOperatorReturnType(
              function,
              CoGroupFunction.class,
              true,
              true,
              input1.getType(),
              input2.getType(),
              "CoGroup",
              false);

      return apply(function, resultType);
    }
Exemplo n.º 13
0
  /**
   * Applies a CoMap transformation on a {@link ConnectedStreams} and maps the output to a common
   * type. The transformation calls a {@link CoMapFunction#map1} for each element of the first input
   * and {@link CoMapFunction#map2} for each element of the second input. Each CoMapFunction call
   * returns exactly one element.
   *
   * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams
   * @return The transformed {@link DataStream}
   */
  public <R> SingleOutputStreamOperator<R> map(CoMapFunction<IN1, IN2, R> coMapper) {

    TypeInformation<R> outTypeInfo =
        TypeExtractor.getBinaryOperatorReturnType(
            coMapper,
            CoMapFunction.class,
            false,
            true,
            getType1(),
            getType2(),
            Utils.getCallLocationName(),
            true);

    return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper)));
  }
  private static DataStream<Tuple1<String>> getTextDataStream(
      final StreamExecutionEnvironment env) {
    if (fileOutput) {
      // read the text file from given input path
      TupleTypeInfo<Tuple1<String>> sourceType =
          (TupleTypeInfo<Tuple1<String>>) TypeExtractor.getForObject(new Tuple1<String>(""));
      return env.createInput(
          new CsvInputFormat<Tuple1<String>>(
              new Path(textPath),
              CsvInputFormat.DEFAULT_LINE_DELIMITER,
              CsvInputFormat.DEFAULT_LINE_DELIMITER,
              sourceType),
          sourceType);
    }

    return env.fromElements(WordCountDataTuple.TUPLES);
  }
public class PojoSubclassComparatorTest extends ComparatorTestBase<PojoContainingTuple> {
  TypeInformation<PojoContainingTuple> type = TypeExtractor.getForClass(PojoContainingTuple.class);

  PojoContainingTuple[] data =
      new PojoContainingTuple[] {
        new Subclass(1, 1L, 1L, 17L),
        new Subclass(2, 2L, 2L, 42L),
        new Subclass(8519, 85190L, 85190L, 117L),
        new Subclass(8520, 85191L, 85191L, 93L),
      };

  @Override
  protected TypeComparator<PojoContainingTuple> createComparator(boolean ascending) {
    Assert.assertTrue(type instanceof CompositeType);
    CompositeType<PojoContainingTuple> cType = (CompositeType<PojoContainingTuple>) type;
    ExpressionKeys<PojoContainingTuple> keys =
        new ExpressionKeys<PojoContainingTuple>(new String[] {"theTuple.*"}, cType);
    boolean[] orders = new boolean[keys.getNumberOfKeyFields()];
    Arrays.fill(orders, ascending);
    return cType.createComparator(
        keys.computeLogicalKeyPositions(), orders, 0, new ExecutionConfig());
  }

  @Override
  protected TypeSerializer<PojoContainingTuple> createSerializer() {
    return type.createSerializer(new ExecutionConfig());
  }

  @Override
  protected PojoContainingTuple[] getSortedTestData() {
    return data;
  }

  public static class Subclass extends PojoContainingTuple {

    public long additionalField;

    public Subclass() {}

    public Subclass(int i, long l1, long l2, long additionalField) {
      super(i, l1, l2);
      this.additionalField = additionalField;
    }
  }
}
Exemplo n.º 16
0
    @Override
    public <S extends Serializable> ValueState<S> getKeyValueState(
        String name, Class<S> stateType, S defaultState) {
      requireNonNull(stateType, "The state type class must not be null");

      TypeInformation<S> typeInfo;
      try {
        typeInfo = TypeExtractor.getForClass(stateType);
      } catch (Exception e) {
        throw new RuntimeException(
            "Cannot analyze type '"
                + stateType.getName()
                + "' from the class alone, due to generic type parameters. "
                + "Please specify the TypeInformation directly.",
            e);
      }

      return getKeyValueState(name, typeInfo, defaultState);
    }
Exemplo n.º 17
0
        /**
         * Finalizes a CoGroup transformation by applying a {@link
         * org.apache.flink.api.common.functions.RichCoGroupFunction} to groups of elements with
         * identical keys.<br>
         * Each CoGroupFunction call returns an arbitrary number of keys.
         *
         * @param function The CoGroupFunction that is called for all groups of elements with
         *     identical keys.
         * @return An CoGroupOperator that represents the co-grouped result DataSet.
         * @see org.apache.flink.api.common.functions.RichCoGroupFunction
         * @see DataSet
         */
        public <R> CoGroupOperator<I1, I2, R> with(CoGroupFunction<I1, I2, R> function) {
          if (function == null) {
            throw new NullPointerException("CoGroup function must not be null.");
          }
          TypeInformation<R> returnType =
              TypeExtractor.getCoGroupReturnTypes(
                  function, input1.getType(), input2.getType(), Utils.getCallLocationName(), true);

          return new CoGroupOperator<>(
              input1,
              input2,
              keys1,
              keys2,
              input1.clean(function),
              returnType,
              groupSortKeyOrderFirst,
              groupSortKeyOrderSecond,
              customPartitioner,
              Utils.getCallLocationName());
        }
  @Test
  public void testImmutableEmpty() {
    try {
      TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
          new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>(
              1024 * 1024);
      context.getTaskConfig().setRelativeMemoryDriver(0.5);

      List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
      Collections.shuffle(data);

      TupleTypeInfo<Tuple2<String, Integer>> typeInfo =
          (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
      MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();

      context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE);
      TypeComparator<Tuple2<String, Integer>> comparator =
          typeInfo.createComparator(new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig());

      GatheringCollector<Tuple2<String, Integer>> result =
          new GatheringCollector<Tuple2<String, Integer>>(
              typeInfo.createSerializer(new ExecutionConfig()));

      context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
      context.setComparator1(comparator);
      context.setCollector(result);

      ReduceCombineDriver<Tuple2<String, Integer>> driver =
          new ReduceCombineDriver<Tuple2<String, Integer>>();
      driver.setup(context);
      driver.prepare();
      driver.run();

      Assert.assertEquals(0, result.getList().size());
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
public class JumpingTimePreReducerTest {

  TypeSerializer<Integer> serializer = TypeExtractor.getForObject(1).createSerializer(null);

  ReduceFunction<Integer> reducer = new SumReducer();

  @Test
  public void testEmitWindow() throws Exception {

    TestCollector<StreamWindow<Integer>> collector = new TestCollector<StreamWindow<Integer>>();
    List<StreamWindow<Integer>> collected = collector.getCollected();

    WindowBuffer<Integer> wb =
        new JumpingTimePreReducer<Integer>(
            reducer,
            serializer,
            3,
            2,
            new TimestampWrapper<Integer>(
                new Timestamp<Integer>() {

                  private static final long serialVersionUID = 1L;

                  @Override
                  public long getTimestamp(Integer value) {
                    return value;
                  }
                },
                1));

    wb.store(1);
    wb.store(2);
    wb.store(3);
    wb.evict(1);
    wb.emitWindow(collector);

    assertEquals(1, collected.size());
    assertEquals(StreamWindow.fromElements(5), collected.get(0));

    wb.store(4);
    wb.store(5);

    // Nothing should happen here
    wb.evict(2);

    wb.store(6);

    wb.emitWindow(collector);
    wb.evict(2);
    wb.emitWindow(collector);
    wb.store(12);
    wb.emitWindow(collector);

    assertEquals(3, collected.size());
    assertEquals(StreamWindow.fromElements(11), collected.get(1));
    assertEquals(StreamWindow.fromElements(12), collected.get(2));
  }

  private static class SumReducer implements ReduceFunction<Integer> {

    private static final long serialVersionUID = 1L;

    @Override
    public Integer reduce(Integer value1, Integer value2) throws Exception {
      return value1 + value2;
    }
  }
}
Exemplo n.º 20
0
  @SuppressWarnings({"unchecked", "rawtypes"})
  private SingleOutputStreamOperator<?, ?> createOutput(
      String boltId, IRichBolt bolt, Map<GlobalStreamId, DataStream<Tuple>> inputStreams) {
    assert (boltId != null);
    assert (bolt != null);
    assert (inputStreams != null);

    Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator =
        inputStreams.entrySet().iterator();

    Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next();
    GlobalStreamId streamId1 = input1.getKey();
    String inputStreamId1 = streamId1.get_streamId();
    String inputComponentId1 = streamId1.get_componentId();
    Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1);
    DataStream<Tuple> singleInputStream = input1.getValue();

    DataStream<StormTuple<Tuple>> mergedInputStream = null;
    while (iterator.hasNext()) {
      Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next();
      GlobalStreamId streamId2 = input2.getKey();
      DataStream<Tuple> inputStream2 = input2.getValue();

      if (mergedInputStream == null) {
        mergedInputStream =
            singleInputStream
                .connect(inputStream2)
                .flatMap(
                    new TwoFlinkStreamsMerger(
                        streamId1,
                        inputSchema1,
                        streamId2,
                        this.outputStreams
                            .get(streamId2.get_componentId())
                            .get(streamId2.get_streamId())))
                .returns(StormTuple.class);
      } else {
        mergedInputStream =
            mergedInputStream
                .connect(inputStream2)
                .flatMap(
                    new StormFlinkStreamMerger(
                        streamId2,
                        this.outputStreams
                            .get(streamId2.get_componentId())
                            .get(streamId2.get_streamId())))
                .returns(StormTuple.class);
      }
    }

    final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId);
    final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId);

    final SingleOutputStreamOperator<?, ?> outputStream;

    if (boltOutputs.size() < 2) { // single output stream or sink
      String outputStreamId;
      if (boltOutputs.size() == 1) {
        outputStreamId = (String) boltOutputs.keySet().toArray()[0];
      } else {
        outputStreamId = null;
      }

      final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId);

      final SingleOutputStreamOperator<Tuple, ?> outStream;

      // only one input
      if (inputStreams.entrySet().size() == 1) {
        BoltWrapper<Tuple, Tuple> boltWrapper =
            new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
        boltWrapper.setStormTopology(stormTopology);
        outStream = singleInputStream.transform(boltId, outType, boltWrapper);
      } else {
        MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper =
            new MergedInputsBoltWrapper<Tuple, Tuple>(bolt, boltId, null);
        boltWrapper.setStormTopology(stormTopology);
        outStream = mergedInputStream.transform(boltId, outType, boltWrapper);
      }

      if (outType != null) {
        // only for non-sink nodes
        final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
        op.put(outputStreamId, outStream);
        availableInputs.put(boltId, op);
      }
      outputStream = outStream;
    } else {
      final TypeInformation<SplitStreamType<Tuple>> outType =
          (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class);

      final SingleOutputStreamOperator<SplitStreamType<Tuple>, ?> multiStream;

      // only one input
      if (inputStreams.entrySet().size() == 1) {
        final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs =
            new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
        boltWrapperMultipleOutputs.setStormTopology(stormTopology);
        multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
      } else {
        final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs =
            new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(bolt, boltId, null);
        boltWrapperMultipleOutputs.setStormTopology(stormTopology);
        multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
      }

      final SplitStream<SplitStreamType<Tuple>> splitStream =
          multiStream.split(new StormStreamSelector<Tuple>());

      final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
      for (String outputStreamId : boltOutputs.keySet()) {
        op.put(
            outputStreamId, splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>()));
        SingleOutputStreamOperator<Tuple, ?> outStream =
            splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>());
        outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId));
        op.put(outputStreamId, outStream);
      }
      availableInputs.put(boltId, op);
      outputStream = multiStream;
    }

    return outputStream;
  }
  @Test
  public void testAllReduceDriverImmutable() {
    try {
      {
        TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
            new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>();

        List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
        TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<String, Integer>> input =
            new RegularToMutableObjectIterator<Tuple2<String, Integer>>(
                data.iterator(), typeInfo.createSerializer());

        GatheringCollector<Tuple2<String, Integer>> result =
            new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer());

        context.setDriverStrategy(DriverStrategy.ALL_REDUCE);
        context.setInput1(input, typeInfo.createSerializer());
        context.setCollector(result);
        context.setUdf(new ConcatSumFirstReducer());

        AllReduceDriver<Tuple2<String, Integer>> driver =
            new AllReduceDriver<Tuple2<String, Integer>>();
        driver.setup(context);
        driver.prepare();
        driver.run();

        Tuple2<String, Integer> res = result.getList().get(0);

        char[] foundString = res.f0.toCharArray();
        Arrays.sort(foundString);

        char[] expectedString = "abcddeeeffff".toCharArray();
        Arrays.sort(expectedString);

        Assert.assertArrayEquals(expectedString, foundString);
        Assert.assertEquals(78, res.f1.intValue());
      }

      {
        TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
            new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>();

        List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
        TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<String, Integer>> input =
            new RegularToMutableObjectIterator<Tuple2<String, Integer>>(
                data.iterator(), typeInfo.createSerializer());

        GatheringCollector<Tuple2<String, Integer>> result =
            new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer());

        context.setDriverStrategy(DriverStrategy.ALL_REDUCE);
        context.setInput1(input, typeInfo.createSerializer());
        context.setCollector(result);
        context.setUdf(new ConcatSumSecondReducer());

        AllReduceDriver<Tuple2<String, Integer>> driver =
            new AllReduceDriver<Tuple2<String, Integer>>();
        driver.setup(context);
        driver.prepare();
        driver.run();

        Tuple2<String, Integer> res = result.getList().get(0);

        char[] foundString = res.f0.toCharArray();
        Arrays.sort(foundString);

        char[] expectedString = "abcddeeeffff".toCharArray();
        Arrays.sort(expectedString);

        Assert.assertArrayEquals(expectedString, foundString);
        Assert.assertEquals(78, res.f1.intValue());
      }
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Exemplo n.º 22
0
public class SlidingTimePreReducerTest {

  TypeSerializer<Integer> serializer = TypeExtractor.getForObject(1).createSerializer(null);
  TypeInformation<Tuple2<Integer, Integer>> tupleType =
      TypeInfoParser.parse("Tuple2<Integer,Integer>");

  ReduceFunction<Integer> reducer = new SumReducer();
  ReduceFunction<Tuple2<Integer, Integer>> tupleReducer = new TupleSumReducer();

  @Test
  @SuppressWarnings("unchecked")
  public void testPreReduce1() throws Exception {
    // This ensures that the buffer is properly cleared after a burst of elements by
    // replaying the same sequence of elements with a later timestamp and expecting the same
    // result.

    TestOutput<StreamWindow<Tuple2<Integer, Integer>>> collector =
        new TestOutput<StreamWindow<Tuple2<Integer, Integer>>>();

    SlidingTimePreReducer<Tuple2<Integer, Integer>> preReducer =
        new SlidingTimePreReducer<Tuple2<Integer, Integer>>(
            tupleReducer,
            tupleType.createSerializer(new ExecutionConfig()),
            3,
            2,
            new TimestampWrapper<Tuple2<Integer, Integer>>(
                new Timestamp<Tuple2<Integer, Integer>>() {

                  private static final long serialVersionUID = 1L;

                  @Override
                  public long getTimestamp(Tuple2<Integer, Integer> value) {
                    return value.f0;
                  }
                },
                1));

    int timeOffset = 0;

    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 1, 1));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 2, 2));
    preReducer.emitWindow(collector);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 3, 3));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 4, 4));
    preReducer.evict(1);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 5, 5));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 6, 6));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 7, 7));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 8, 8));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 9, 9));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 10, 10));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 11, 11));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 12, 12));
    preReducer.emitWindow(collector);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 13, 13));

    // ensure that everything is cleared out
    preReducer.evict(100);

    timeOffset = 25; // a little while later...

    // Repeat the same sequence, this should produce the same result
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 1, 1));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 2, 2));
    preReducer.emitWindow(collector);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 3, 3));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 4, 4));
    preReducer.evict(1);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 5, 5));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 6, 6));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 7, 7));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 8, 8));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 9, 9));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 10, 10));
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 11, 11));
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 12, 12));
    preReducer.emitWindow(collector);
    preReducer.store(new Tuple2<Integer, Integer>(timeOffset + 13, 13));

    List<StreamWindow<Tuple2<Integer, Integer>>> expected =
        new ArrayList<StreamWindow<Tuple2<Integer, Integer>>>();
    timeOffset = 0; // rewind ...
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 1, 3)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 2, 9)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 4, 15)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 6, 21)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 8, 27)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 10, 33)));

    timeOffset = 25; // and back to the future ...
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 1, 3)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 2, 9)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 4, 15)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 6, 21)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 8, 27)));
    expected.add(StreamWindow.fromElements(new Tuple2<Integer, Integer>(timeOffset + 10, 33)));

    assertEquals(expected, collector.getCollected());
  }

  @Test
  public void testPreReduce2() throws Exception {
    TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>();

    SlidingTimePreReducer<Integer> preReducer =
        new SlidingTimePreReducer<Integer>(
            reducer,
            serializer,
            5,
            2,
            new TimestampWrapper<Integer>(
                new Timestamp<Integer>() {

                  private static final long serialVersionUID = 1L;

                  @Override
                  public long getTimestamp(Integer value) {
                    return value;
                  }
                },
                1));

    preReducer.store(1);
    preReducer.store(2);
    preReducer.emitWindow(collector);
    preReducer.store(3);
    preReducer.store(4);
    preReducer.emitWindow(collector);
    preReducer.store(5);
    preReducer.store(6);
    preReducer.evict(1);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(7);
    preReducer.store(8);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(9);
    preReducer.store(10);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(11);
    preReducer.store(12);
    preReducer.emitWindow(collector);
    preReducer.store(13);

    List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>();
    expected.add(StreamWindow.fromElements(3));
    expected.add(StreamWindow.fromElements(10));
    expected.add(StreamWindow.fromElements(20));
    expected.add(StreamWindow.fromElements(30));
    expected.add(StreamWindow.fromElements(40));
    expected.add(StreamWindow.fromElements(50));

    assertEquals(expected, collector.getCollected());
  }

  @Test
  public void testPreReduce3() throws Exception {
    TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>();

    SlidingTimePreReducer<Integer> preReducer =
        new SlidingTimePreReducer<Integer>(
            reducer,
            serializer,
            6,
            3,
            new TimestampWrapper<Integer>(
                new Timestamp<Integer>() {

                  private static final long serialVersionUID = 1L;

                  @Override
                  public long getTimestamp(Integer value) {
                    return value;
                  }
                },
                1));

    preReducer.store(1);
    preReducer.store(2);
    preReducer.store(3);
    preReducer.emitWindow(collector);
    preReducer.store(4);
    preReducer.store(5);
    preReducer.store(6);
    preReducer.emitWindow(collector);
    preReducer.evict(3);
    preReducer.store(7);
    preReducer.store(8);
    preReducer.store(9);
    preReducer.emitWindow(collector);
    preReducer.evict(3);
    preReducer.store(10);
    preReducer.store(11);
    preReducer.store(12);
    preReducer.emitWindow(collector);
    preReducer.evict(3);
    preReducer.store(13);

    List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>();
    expected.add(StreamWindow.fromElements(6));
    expected.add(StreamWindow.fromElements(21));
    expected.add(StreamWindow.fromElements(39));
    expected.add(StreamWindow.fromElements(57));

    assertEquals(expected, collector.getCollected());
  }

  @Test
  public void testPreReduce4() throws Exception {
    TestOutput<StreamWindow<Integer>> collector = new TestOutput<StreamWindow<Integer>>();

    SlidingTimePreReducer<Integer> preReducer =
        new SlidingTimePreReducer<Integer>(
            reducer,
            serializer,
            3,
            2,
            new TimestampWrapper<Integer>(
                new Timestamp<Integer>() {

                  private static final long serialVersionUID = 1L;

                  @Override
                  public long getTimestamp(Integer value) {
                    return value;
                  }
                },
                1));

    preReducer.store(1);
    preReducer.store(2);
    preReducer.emitWindow(collector);
    preReducer.store(3);
    preReducer.store(4);
    preReducer.evict(1);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(5);
    preReducer.store(6);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(7);
    preReducer.store(8);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.emitWindow(collector);
    preReducer.emitWindow(collector);
    preReducer.evict(2);
    preReducer.store(14);
    preReducer.emitWindow(collector);
    preReducer.emitWindow(collector);
    preReducer.evict(1);
    preReducer.emitWindow(collector);
    preReducer.emitWindow(collector);
    preReducer.store(21);
    preReducer.emitWindow(collector);
    preReducer.evict(1);
    preReducer.emitWindow(collector);

    preReducer.store(9);

    List<StreamWindow<Integer>> expected = new ArrayList<StreamWindow<Integer>>();
    expected.add(StreamWindow.fromElements(3));
    expected.add(StreamWindow.fromElements(9));
    expected.add(StreamWindow.fromElements(15));
    expected.add(StreamWindow.fromElements(21));
    expected.add(StreamWindow.fromElements(8));
    expected.add(StreamWindow.fromElements(8));
    expected.add(StreamWindow.fromElements(14));
    expected.add(StreamWindow.fromElements(14));
    expected.add(StreamWindow.fromElements(21));

    assertEquals(expected, collector.getCollected());
  }

  private static class SumReducer implements ReduceFunction<Integer> {

    private static final long serialVersionUID = 1L;

    @Override
    public Integer reduce(Integer value1, Integer value2) throws Exception {
      return value1 + value2;
    }
  }

  private static class TupleSumReducer implements ReduceFunction<Tuple2<Integer, Integer>> {

    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<Integer, Integer> reduce(
        Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception {
      return new Tuple2<Integer, Integer>(value1.f0, value1.f1 + value2.f1);
    }
  }
}
Exemplo n.º 23
0
 /**
  * Continues a CoGroup transformation and defines a {@link KeySelector} function for the
  * second co-grouped {@link DataSet}.<br>
  * The KeySelector function is called for each element of the second DataSet and extracts a
  * single key value on which the DataSet is grouped. <br>
  *
  * @param keyExtractor The KeySelector function which extracts the key values from the second
  *     DataSet on which it is grouped.
  * @return An incomplete CoGroup transformation. Call {@link
  *     org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}
  *     to finalize the CoGroup transformation.
  */
 public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) {
   TypeInformation<K> keyType =
       TypeExtractor.getKeySelectorTypes(keyExtractor, input2.getType());
   return createCoGroupOperator(
       new SelectorFunctionKeys<>(keyExtractor, input2.getType(), keyType));
 }
Exemplo n.º 24
0
 /** Specifies a {@link KeySelector} for elements from the first input. */
 public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector) {
   TypeInformation<KEY> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType());
   return new Where<>(input1.clean(keySelector), keyType);
 }
Exemplo n.º 25
0
 /**
  * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first
  * co-grouped {@link DataSet}.<br>
  * The KeySelector function is called for each element of the first DataSet and extracts a
  * single key value on which the DataSet is grouped. <br>
  *
  * @param keyExtractor The KeySelector function which extracts the key values from the DataSet
  *     on which it is grouped.
  * @return An incomplete CoGroup transformation. Call {@link
  *     org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate#equalTo(int...)}
  *     to continue the CoGroup.
  * @see KeySelector
  * @see DataSet
  */
 public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) {
   TypeInformation<K> keyType =
       TypeExtractor.getKeySelectorTypes(keyExtractor, input1.getType());
   return new CoGroupOperatorSetsPredicate(
       new SelectorFunctionKeys<>(keyExtractor, input1.getType(), keyType));
 }
 @Override
 public TypeInformation<String> getProducedType() {
   return TypeExtractor.getForClass(String.class);
 }
Exemplo n.º 27
0
  /** Creates a Flink program that uses the specified spouts and bolts. */
  private void translateTopology() {

    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();

    // Storm defaults to parallelism 1
    env.setParallelism(1);

    /* Translation of topology */

    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
      final String spoutId = spout.getKey();
      final IRichSpout userSpout = spout.getValue();

      final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
      userSpout.declareOutputFields(declarer);
      final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
      this.outputStreams.put(spoutId, sourceStreams);
      declarers.put(spoutId, declarer);

      final HashMap<String, DataStream<Tuple>> outputStreams =
          new HashMap<String, DataStream<Tuple>>();
      final DataStreamSource<?> source;

      if (sourceStreams.size() == 1) {
        final SpoutWrapper<Tuple> spoutWrapperSingleOutput =
            new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
        spoutWrapperSingleOutput.setStormTopology(stormTopology);

        final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];

        DataStreamSource<Tuple> src =
            env.addSource(
                spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));

        outputStreams.put(outputStreamId, src);
        source = src;
      } else {
        final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs =
            new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
        spoutWrapperMultipleOutputs.setStormTopology(stormTopology);

        @SuppressWarnings({"unchecked", "rawtypes"})
        DataStreamSource<SplitStreamType<Tuple>> multiSource =
            env.addSource(
                spoutWrapperMultipleOutputs,
                spoutId,
                (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));

        SplitStream<SplitStreamType<Tuple>> splitSource =
            multiSource.split(new StormStreamSelector<Tuple>());
        for (String streamId : sourceStreams.keySet()) {
          SingleOutputStreamOperator<Tuple, ?> outStream =
              splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
          outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
          outputStreams.put(streamId, outStream);
        }
        source = multiSource;
      }
      availableInputs.put(spoutId, outputStreams);

      final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
      if (common.is_set_parallelism_hint()) {
        int dop = common.get_parallelism_hint();
        source.setParallelism(dop);
      } else {
        common.set_parallelism_hint(1);
      }
    }

    /**
     * 1. Connect all spout streams with bolts streams 2. Then proceed with the bolts stream already
     * connected
     *
     * <p>Because we do not know the order in which an iterator steps over a set, we might process a
     * consumer before its producer ->thus, we might need to repeat multiple times
     */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
      if (!makeProgress) {
        StringBuilder strBld = new StringBuilder();
        strBld.append("Unable to build Topology. Could not connect the following bolts:");
        for (String boltId : bolts.keySet()) {
          strBld.append("\n  ");
          strBld.append(boltId);
          strBld.append(": missing input streams [");
          for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
            strBld.append("'");
            strBld.append(streams.getKey().get_streamId());
            strBld.append("' from '");
            strBld.append(streams.getKey().get_componentId());
            strBld.append("'; ");
          }
          strBld.append("]");
        }

        throw new RuntimeException(strBld.toString());
      }
      makeProgress = false;

      final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
      while (boltsIterator.hasNext()) {

        final Entry<String, IRichBolt> bolt = boltsIterator.next();
        final String boltId = bolt.getKey();
        final IRichBolt userBolt = copyObject(bolt.getValue());

        final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();

        Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs =
            unprocessdInputsPerBolt.get(boltId);
        if (unprocessedBoltInputs == null) {
          unprocessedBoltInputs = new HashSet<>();
          unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
          unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
        }

        // check if all inputs are available
        final int numberOfInputs = unprocessedBoltInputs.size();
        int inputsAvailable = 0;
        for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
          final String producerId = entry.getKey().get_componentId();
          final String streamId = entry.getKey().get_streamId();
          final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
          if (streams != null && streams.get(streamId) != null) {
            inputsAvailable++;
          }
        }

        if (inputsAvailable != numberOfInputs) {
          // traverse other bolts first until inputs are available
          continue;
        } else {
          makeProgress = true;
          boltsIterator.remove();
        }

        final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);

        for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
          final GlobalStreamId streamId = input.getKey();
          final Grouping grouping = input.getValue();

          final String producerId = streamId.get_componentId();

          final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);

          inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
        }

        final SingleOutputStreamOperator<?, ?> outputStream =
            createOutput(boltId, userBolt, inputStreams);

        if (common.is_set_parallelism_hint()) {
          int dop = common.get_parallelism_hint();
          outputStream.setParallelism(dop);
        } else {
          common.set_parallelism_hint(1);
        }
      }
    }
  }
  @Test
  public void testReduceDriverMutable() {
    try {
      {
        TestTaskContext<
                ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>
            context =
                new TestTaskContext<
                    ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>(
                    1024 * 1024);
        context.getTaskConfig().setRelativeMemoryDriver(0.5);

        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo =
            (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input =
            new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(
                data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator =
            typeInfo.createComparator(
                new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig());

        GatheringCollector<Tuple2<StringValue, IntValue>> result =
            new GatheringCollector<Tuple2<StringValue, IntValue>>(
                typeInfo.createSerializer(new ExecutionConfig()));

        context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumFirstMutableReducer());

        ReduceCombineDriver<Tuple2<StringValue, IntValue>> driver =
            new ReduceCombineDriver<Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();

        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();

        DriverTestData.compareTupleArrays(expected, res);
      }
      {
        TestTaskContext<
                ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>
            context =
                new TestTaskContext<
                    ReduceFunction<Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>(
                    1024 * 1024);
        context.getTaskConfig().setRelativeMemoryDriver(0.5);

        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo =
            (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input =
            new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(
                data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator =
            typeInfo.createComparator(
                new int[] {0}, new boolean[] {true}, 0, new ExecutionConfig());

        GatheringCollector<Tuple2<StringValue, IntValue>> result =
            new GatheringCollector<Tuple2<StringValue, IntValue>>(
                typeInfo.createSerializer(new ExecutionConfig()));

        context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumSecondMutableReducer());

        ReduceCombineDriver<Tuple2<StringValue, IntValue>> driver =
            new ReduceCombineDriver<Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();

        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();

        DriverTestData.compareTupleArrays(expected, res);
      }
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
public class TumblingGroupedPreReducerTest {

  TypeInformation<Tuple2<Integer, Integer>> type =
      TypeExtractor.getForObject(new Tuple2<Integer, Integer>(1, 1));
  TypeSerializer<Tuple2<Integer, Integer>> serializer = type.createSerializer(null);

  KeySelector<Tuple2<Integer, Integer>, ?> key =
      KeySelectorUtil.getSelectorForKeys(
          new Keys.ExpressionKeys<Tuple2<Integer, Integer>>(new int[] {0}, type), type, null);

  Reducer reducer = new Reducer();

  @SuppressWarnings("unchecked")
  @Test
  public void testEmitWindow() throws Exception {

    List<Tuple2<Integer, Integer>> inputs = new ArrayList<Tuple2<Integer, Integer>>();
    inputs.add(new Tuple2<Integer, Integer>(1, 1));
    inputs.add(new Tuple2<Integer, Integer>(0, 0));
    inputs.add(new Tuple2<Integer, Integer>(1, -1));
    inputs.add(new Tuple2<Integer, Integer>(1, -2));

    TestCollector<StreamWindow<Tuple2<Integer, Integer>>> collector =
        new TestCollector<StreamWindow<Tuple2<Integer, Integer>>>();
    List<StreamWindow<Tuple2<Integer, Integer>>> collected = collector.getCollected();

    WindowBuffer<Tuple2<Integer, Integer>> wb =
        new TumblingGroupedPreReducer<Tuple2<Integer, Integer>>(reducer, key, serializer);

    wb.store(serializer.copy(inputs.get(0)));
    wb.store(serializer.copy(inputs.get(1)));
    wb.emitWindow(collector);
    wb.evict(2);

    assertEquals(1, collected.size());

    assertSetEquals(
        StreamWindow.fromElements(
            new Tuple2<Integer, Integer>(1, 1), new Tuple2<Integer, Integer>(0, 0)),
        collected.get(0));

    wb.store(serializer.copy(inputs.get(0)));
    wb.store(serializer.copy(inputs.get(1)));
    wb.store(serializer.copy(inputs.get(2)));

    wb.store(serializer.copy(inputs.get(3)));

    wb.emitWindow(collector);
    wb.evict(4);

    assertEquals(2, collected.size());

    assertSetEquals(
        StreamWindow.fromElements(
            new Tuple2<Integer, Integer>(3, -2), new Tuple2<Integer, Integer>(0, 0)),
        collected.get(1));

    // Test whether function is mutating inputs or not
    assertEquals(2, reducer.allInputs.size());
    assertEquals(reducer.allInputs.get(0), inputs.get(2));
    assertEquals(reducer.allInputs.get(1), inputs.get(3));
  }

  @SuppressWarnings("unchecked")
  @Test
  public void testEmitWindow2() throws Exception {

    List<Tuple2<Integer, Integer>> inputs = new ArrayList<Tuple2<Integer, Integer>>();
    inputs.add(new Tuple2<Integer, Integer>(1, 1));
    inputs.add(new Tuple2<Integer, Integer>(0, 0));
    inputs.add(new Tuple2<Integer, Integer>(1, -1));
    inputs.add(new Tuple2<Integer, Integer>(1, -2));

    TestCollector<StreamWindow<Tuple2<Integer, Integer>>> collector =
        new TestCollector<StreamWindow<Tuple2<Integer, Integer>>>();
    List<StreamWindow<Tuple2<Integer, Integer>>> collected = collector.getCollected();

    WindowBuffer<Tuple2<Integer, Integer>> wb =
        new TumblingGroupedPreReducer<Tuple2<Integer, Integer>>(reducer, key, serializer)
            .sequentialID();

    wb.store(serializer.copy(inputs.get(0)));
    wb.store(serializer.copy(inputs.get(1)));
    wb.emitWindow(collector);
    wb.evict(2);

    assertSetEquals(StreamWindow.fromElements(inputs.get(0), inputs.get(1)), collected.get(0));

    wb.store(serializer.copy(inputs.get(0)));
    wb.store(serializer.copy(inputs.get(1)));
    wb.store(serializer.copy(inputs.get(2)));
    wb.emitWindow(collector);
    wb.evict(3);

    assertSetEquals(
        StreamWindow.fromElements(new Tuple2<Integer, Integer>(2, 0), inputs.get(1)),
        collected.get(1));
  }

  private static <T> void assertSetEquals(Collection<T> first, Collection<T> second) {
    assertEquals(new HashSet<T>(first), new HashSet<T>(second));
  }

  @SuppressWarnings("serial")
  private class Reducer implements ReduceFunction<Tuple2<Integer, Integer>> {

    public List<Tuple2<Integer, Integer>> allInputs = new ArrayList<Tuple2<Integer, Integer>>();

    @Override
    public Tuple2<Integer, Integer> reduce(
        Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception {
      allInputs.add(value2);
      value1.f0 = value1.f0 + value2.f0;
      value1.f1 = value1.f1 + value2.f1;
      return value1;
    }
  }
}