@Override public Tuple2<Integer, Integer> reduce( Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2) throws Exception { state.update(state.value() + 1); globalCounts.put(value1.f0, state.value()); return new Tuple2<>(value1.f0, value1.f1 + value2.f1); }
@Override public void apply( Integer key, TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception { for (Integer i : values) { // we need to update this state before emitting elements. Else, the test's main // thread will have received all output elements before the state is updated and // the checks may fail state.update(state.value() + 1); globalCounts.put(key, state.value()); out.collect(i); } }
@Override public TriggerResult onElement(Object element, long timestamp, W window, TriggerContext ctx) throws Exception { OperatorState<Boolean> first = ctx.getKeyValueState("first", true); if (first.value()) { long start = timestamp - (timestamp % interval); long nextFireTimestamp = start + interval; ctx.registerEventTimeTimer(nextFireTimestamp); first.update(false); return TriggerResult.CONTINUE; } return TriggerResult.CONTINUE; }
@Override public void run(SourceContext<String> ctx) throws Exception { final Object lockingObject = ctx.getCheckpointLock(); while (isRunning && index.value() < numElements) { char first = (char) ((index.value() % 40) + 40); stringBuilder.setLength(0); stringBuilder.append(first); String result = randomString(stringBuilder, rnd); synchronized (lockingObject) { index.update(index.value() + step); ctx.collect(result); } } }
@Override public void run(SourceContext<OUT> ctx) throws Exception { if (iteratorToRead == null) { throw new IllegalStateException("Kafka iterator not initialized properly."); } final Object checkpointLock = ctx.getCheckpointLock(); while (running && iteratorToRead.hasNext()) { MessageAndMetadata<byte[], byte[]> message = iteratorToRead.next(); if (lastOffsets.getState()[message.partition()] >= message.offset()) { LOG.info( "Skipping message with offset {} from partition {}", message.offset(), message.partition()); continue; } OUT next = deserializationSchema.deserialize(message.message()); if (deserializationSchema.isEndOfStream(next)) { LOG.info("DeserializationSchema signaled end of stream for this source"); break; } // make the state update and the element emission atomic synchronized (checkpointLock) { lastOffsets.getState()[message.partition()] = message.offset(); ctx.collect(next); } if (LOG.isTraceEnabled()) { LOG.trace( "Processed record with offset {} from partition {}", message.offset(), message.partition()); } } }
@Override public void close() throws IOException { counts[getRuntimeContext().getIndexOfThisSubtask()] = count.value(); }
@Override public PrefixCount map(String value) throws IOException { count.update(count.value() + 1); return new PrefixCount(value.substring(0, 1), value, 1L); }
@Override public PrefixCount map(PrefixCount value) throws Exception { count.update(count.value() + 1); return value; }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); ConsumerConnector consumer = Consumer.createJavaConsumerConnector(this.consumerConfig); // we request only one stream per consumer instance. Kafka will make sure that each consumer // group // will see each message only once. Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1); Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap); if (streams.size() != 1) { throw new RuntimeException("Expected only one message stream but got " + streams.size()); } List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName); if (kafkaStreams == null) { throw new RuntimeException( "Requested stream not available. Available streams: " + streams.toString()); } if (kafkaStreams.size() != 1) { throw new RuntimeException( "Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams"); } LOG.info( "Opening Consumer instance for topic '{}' on group '{}'", topicName, consumerConfig.groupId()); this.iteratorToRead = kafkaStreams.get(0).iterator(); this.consumer = consumer; zkClient = new ZkClient( consumerConfig.zkConnect(), consumerConfig.zkSessionTimeoutMs(), consumerConfig.zkConnectionTimeoutMs(), new KafkaZKStringSerializer()); // most likely the number of offsets we're going to store here will be lower than the number of // partitions. int numPartitions = getNumberOfPartitions(); LOG.debug("The topic {} has {} partitions", topicName, numPartitions); this.lastOffsets = getRuntimeContext().getOperatorState("offset", new long[numPartitions], false); this.commitedOffsets = new long[numPartitions]; // check if there are offsets to restore if (!Arrays.equals(lastOffsets.getState(), new long[numPartitions])) { if (lastOffsets.getState().length != numPartitions) { throw new IllegalStateException( "There are " + lastOffsets.getState().length + " offsets to restore for topic " + topicName + " but " + "there are only " + numPartitions + " in the topic"); } LOG.info("Setting restored offsets {} in ZooKeeper", Arrays.toString(lastOffsets.getState())); setOffsetsInZooKeeper(lastOffsets.getState()); } else { // initialize empty offsets Arrays.fill(this.lastOffsets.getState(), -1); } Arrays.fill(this.commitedOffsets, 0); // just to make it clear running = true; }