@Override
 public Tuple2<String, Integer> reduce(
     Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) {
   value2.f0 = value1.f0 + value2.f0;
   value2.f1 = value1.f1 + value2.f1;
   return value2;
 }
 @Override
 public Tuple2<Integer, Integer> reduce(
     Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception {
   allInputs.add(value2);
   value1.f0 = value1.f0 + value2.f0;
   value1.f1 = value1.f1 + value2.f1;
   return value1;
 }
Example #3
0
    /** {@inheritDoc} */
    @Override
    public Tuple2<LongWritable, Mutation> map(PED persistentEdgeData) throws Exception {
      LongWritable key = new LongWritable(persistentEdgeData.getId());
      Put put = new Put(edgeDataHandler.getRowKey(persistentEdgeData.getId()));
      put = edgeDataHandler.writeEdgeData(put, persistentEdgeData);

      reuseTuple.f0 = key;
      reuseTuple.f1 = put;
      return reuseTuple;
    }
 @Override
 public void run(SourceContext<Tuple2<Integer, String>> ctx) throws Exception {
   for (int i = 0; i < numElements && running; i++) {
     if (i % 5 == 0 && i > 0) {
       // update the clock after "five seconds", so we get 20 seconds in total
       // with 5 elements in each time window
       latch1.await();
       latch2.await();
       ModifyableClock.setCurrentTime(i * 1000);
     }
     ctx.collect(Tuple2.of(i, "message #" + i));
   }
 }
Example #5
0
    @Override
    public void flatMap(Integer value, Collector<Tuple2<Integer, Integer>> out) throws Exception {

      int count = counter.value() + 1;
      counter.update(count);

      int s = sum.value() + value;
      sum.update(s);

      if (count % numberElements == 0) {
        out.collect(Tuple2.of(getRuntimeContext().getIndexOfThisSubtask(), s));
        workCompletedLatch.countDown();
      }
    }
  @Test
  public void testMap() throws Throwable {

    /*
     * Define the input DataSet:
     * Get a DataSetBuilder with .createTestStreamWith(record).
     * Add data records to it and retrieve a DataSet,
     * by calling .complete().
     */
    DataSet<Tuple2<Integer, String>> testDataSet =
        createTestDataSetWith(Tuple2.of(1, "test"))
            .emit(Tuple2.of(2, "why"))
            .emit(Tuple2.of(3, "not"))
            .emit(Tuple2.of(4, "batch?"))
            .close();

    /*
     * Define the output you expect from the the transformation under test.
     * Add the tuples you want to see with .expect(record).
     */
    ExpectedRecords<Tuple2<String, Integer>> output =
        ExpectedRecords.create(Tuple2.of("test", 1))
            .expect(Tuple2.of("why", 2))
            .expect(Tuple2.of("not", 3));
    // refine your expectations by adding requirements
    output.refine().only().inOrder(strict);

    /*
     * Creates an OutputMatcher using AssertTuples.
     * AssertTuples builds an OutputMatcher working on Tuples.
     * You assign String identifiers to your Tuple,
     * and add hamcrest matchers testing the values.
     */
    OutputMatcher<Tuple2<String, Integer>> matcher =
        // name the values in your tuple with keys:
        new MatchTuples<Tuple2<String, Integer>>("name", "value")
            // add an assertion using a value and hamcrest matchers
            .assertThat("name", isA(String.class))
            .assertThat("value", lessThan(5))
            // express how many matchers must return true for your test to pass:
            .anyOfThem()
            // define how many records need to fulfill the
            .onEachRecord();

    /*
     * Use assertDataSet to map DataSet to an OutputMatcher.
     * ExpectedRecords extends OutputMatcher and thus can be used in this way.
     * Combine the created matchers with anyOf(), implicating that at least one of
     * the matchers must be positive.
     */
    assertDataSet(swap(testDataSet), anyOf(output, matcher), FinishAtCount.of(3));
  }
  /** Tests that no checkpoints happen when the fetcher is not running. */
  @Test
  public void checkRestoredCheckpointWhenFetcherNotReady() throws Exception {
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);

    TestingListState<Serializable> expectedState = new TestingListState<>();
    expectedState.add(Tuple2.of(new KafkaTopicPartition("abc", 13), 16768L));
    expectedState.add(Tuple2.of(new KafkaTopicPartition("def", 7), 987654321L));

    TestingListState<Serializable> listState = new TestingListState<>();

    FlinkKafkaConsumerBase<String> consumer = getConsumer(null, new LinkedMap(), true);

    when(operatorStateStore.getSerializableListState(Matchers.any(String.class)))
        .thenReturn(expectedState);
    consumer.initializeState(operatorStateStore);

    when(operatorStateStore.getSerializableListState(Matchers.any(String.class)))
        .thenReturn(listState);

    consumer.prepareSnapshot(17L, 17L);

    Set<Serializable> expected = new HashSet<>();

    for (Serializable serializable : expectedState.get()) {
      expected.add(serializable);
    }

    int counter = 0;

    for (Serializable serializable : listState.get()) {
      assertTrue(expected.contains(serializable));
      counter++;
    }

    assertEquals(expected.size(), counter);
  }
Example #8
0
  @Test
  public void wrapperStateHandleTest() throws Exception {
    final ClassLoader cl = this.getClass().getClassLoader();

    MockHandle<Serializable> h1 = new MockHandle<Serializable>(1);
    MockHandle<Serializable> h2 = new MockHandle<Serializable>(2);
    StateHandle<Serializable> h3 = new MockHandle<Serializable>(3);

    OperatorStateHandle opH1 = new OperatorStateHandle(h1, true);
    OperatorStateHandle opH2 = new OperatorStateHandle(h2, false);

    Map<String, OperatorStateHandle> opHandles = ImmutableMap.of("h1", opH1, "h2", opH2);

    Tuple2<StateHandle<Serializable>, Map<String, OperatorStateHandle>> fullState =
        Tuple2.of(h3, opHandles);

    List<Tuple2<StateHandle<Serializable>, Map<String, OperatorStateHandle>>> chainedStates =
        ImmutableList.of(fullState);

    WrapperStateHandle wrapperHandle = new WrapperStateHandle(chainedStates);

    WrapperStateHandle dsWrapper = serializeDeserialize(wrapperHandle);

    @SuppressWarnings("unchecked")
    Tuple2<StateHandle<Serializable>, Map<String, OperatorStateHandle>> dsFullState =
        ((List<Tuple2<StateHandle<Serializable>, Map<String, OperatorStateHandle>>>)
                dsWrapper.getState(cl))
            .get(0);

    Map<String, OperatorStateHandle> dsOpHandles = dsFullState.f1;

    assertNull(dsFullState.f0.getState(cl));
    assertFalse(((MockHandle<?>) dsFullState.f0).discarded);
    assertFalse(((MockHandle<?>) dsOpHandles.get("h1").getHandle()).discarded);
    assertNull(dsOpHandles.get("h1").getState(cl));
    assertFalse(((MockHandle<?>) dsOpHandles.get("h2").getHandle()).discarded);
    assertNull(dsOpHandles.get("h2").getState(cl));

    dsWrapper.discardState();

    assertTrue(((MockHandle<?>) dsFullState.f0).discarded);
    assertTrue(((MockHandle<?>) dsOpHandles.get("h1").getHandle()).discarded);
    assertTrue(((MockHandle<?>) dsOpHandles.get("h2").getHandle()).discarded);
  }
Example #9
0
  @Test
  public void testSimpleKeyedPatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);

    // (Event, timestamp)
    DataStream<Event> input =
        env.fromElements(
                Tuple2.of(new Event(1, "start", 1.0), 5L),
                Tuple2.of(new Event(1, "middle", 2.0), 1L),
                Tuple2.of(new Event(2, "middle", 2.0), 4L),
                Tuple2.of(new Event(2, "start", 2.0), 3L),
                Tuple2.of(new Event(1, "end", 3.0), 3L),
                Tuple2.of(new Event(3, "start", 4.1), 5L),
                Tuple2.of(new Event(1, "end", 4.0), 10L),
                Tuple2.of(new Event(2, "end", 2.0), 8L),
                Tuple2.of(new Event(1, "middle", 5.0), 7L),
                Tuple2.of(new Event(3, "middle", 6.0), 9L),
                Tuple2.of(new Event(3, "end", 7.0), 7L),
                // last element for high final watermark
                Tuple2.of(new Event(3, "end", 7.0), 100L))
            .assignTimestampsAndWatermarks(
                new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

                  @Override
                  public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
                    return element.f1;
                  }

                  @Override
                  public Watermark checkAndGetNextWatermark(
                      Tuple2<Event, Long> lastElement, long extractedTimestamp) {
                    return new Watermark(lastElement.f1 - 5);
                  }
                })
            .map(
                new MapFunction<Tuple2<Event, Long>, Event>() {

                  @Override
                  public Event map(Tuple2<Event, Long> value) throws Exception {
                    return value.f0;
                  }
                })
            .keyBy(
                new KeySelector<Event, Integer>() {

                  @Override
                  public Integer getKey(Event value) throws Exception {
                    return value.getId();
                  }
                });

    Pattern<Event, ?> pattern =
        Pattern.<Event>begin("start")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("start");
                  }
                })
            .followedBy("middle")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("middle");
                  }
                })
            .followedBy("end")
            .where(
                new FilterFunction<Event>() {

                  @Override
                  public boolean filter(Event value) throws Exception {
                    return value.getName().equals("end");
                  }
                });

    DataStream<String> result =
        CEP.pattern(input, pattern)
            .select(
                new PatternSelectFunction<Event, String>() {

                  @Override
                  public String select(Map<String, Event> pattern) {
                    StringBuilder builder = new StringBuilder();

                    builder
                        .append(pattern.get("start").getId())
                        .append(",")
                        .append(pattern.get("middle").getId())
                        .append(",")
                        .append(pattern.get("end").getId());

                    return builder.toString();
                  }
                });

    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);

    // the expected sequences of matching event ids
    expected = "1,1,1\n2,2,2";

    env.execute();
  }
 @Override
 public void run(SourceContext<Tuple2<Integer, String>> ctx) throws Exception {
   for (int i = 0; i < numElements && running; i++) {
     ctx.collect(Tuple2.of(i, "message #" + i));
   }
 }
Example #11
0
  /**
   * Tests that a job with non partitioned state can be restarted from a savepoint with a different
   * parallelism if the operator with non-partitioned state are not rescaled.
   *
   * @throws Exception
   */
  @Test
  public void testSavepointRescalingWithKeyedAndNonPartitionedState() throws Exception {
    int numberKeys = 42;
    int numberElements = 1000;
    int numberElements2 = 500;
    int parallelism = numSlots / 2;
    int parallelism2 = numSlots;
    int maxParallelism = 13;

    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();

    ActorGateway jobManager = null;
    JobID jobID = null;

    try {
      jobManager = cluster.getLeaderGateway(deadline.timeLeft());

      JobGraph jobGraph =
          createJobGraphWithKeyedAndNonPartitionedOperatorState(
              parallelism, maxParallelism, parallelism, numberKeys, numberElements, false, 100);

      jobID = jobGraph.getJobID();

      cluster.submitJobDetached(jobGraph);

      // wait til the sources have emitted numberElements for each key and completed a checkpoint
      SubtaskIndexFlatMapper.workCompletedLatch.await(
          deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

      // verify the current state

      Set<Tuple2<Integer, Integer>> actualResult = CollectionSink.getElementsSet();

      Set<Tuple2<Integer, Integer>> expectedResult = new HashSet<>();

      for (int key = 0; key < numberKeys; key++) {
        int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);

        expectedResult.add(
            Tuple2.of(
                KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(
                    maxParallelism, parallelism, keyGroupIndex),
                numberElements * key));
      }

      assertEquals(expectedResult, actualResult);

      // clear the CollectionSink set for the restarted job
      CollectionSink.clearElementsSet();

      Future<Object> savepointPathFuture =
          jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID), deadline.timeLeft());

      final String savepointPath =
          ((JobManagerMessages.TriggerSavepointSuccess)
                  Await.result(savepointPathFuture, deadline.timeLeft()))
              .savepointPath();

      Future<Object> jobRemovedFuture =
          jobManager.ask(
              new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());

      Future<Object> cancellationResponseFuture =
          jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());

      Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());

      assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);

      Await.ready(jobRemovedFuture, deadline.timeLeft());

      jobID = null;

      JobGraph scaledJobGraph =
          createJobGraphWithKeyedAndNonPartitionedOperatorState(
              parallelism2,
              maxParallelism,
              parallelism,
              numberKeys,
              numberElements + numberElements2,
              true,
              100);

      scaledJobGraph.setSavepointPath(savepointPath);

      jobID = scaledJobGraph.getJobID();

      cluster.submitJobAndWait(scaledJobGraph, false);

      jobID = null;

      Set<Tuple2<Integer, Integer>> actualResult2 = CollectionSink.getElementsSet();

      Set<Tuple2<Integer, Integer>> expectedResult2 = new HashSet<>();

      for (int key = 0; key < numberKeys; key++) {
        int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
        expectedResult2.add(
            Tuple2.of(
                KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(
                    maxParallelism, parallelism2, keyGroupIndex),
                key * (numberElements + numberElements2)));
      }

      assertEquals(expectedResult2, actualResult2);

    } finally {
      // clear the CollectionSink set for the restarted job
      CollectionSink.clearElementsSet();

      // clear any left overs from a possibly failed job
      if (jobID != null && jobManager != null) {
        Future<Object> jobRemovedFuture =
            jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);

        try {
          Await.ready(jobRemovedFuture, timeout);
        } catch (TimeoutException | InterruptedException ie) {
          fail("Failed while cleaning up the cluster.");
        }
      }
    }
  }