private void read(final KafkaStream<String, String> stream) {
    while (stream.iterator().hasNext()) {
      final int phase = phaser.register();

      final MessageAndMetadata<String, String> msg = stream.iterator().next();
      final long offset = msg.offset();
      final long partition = msg.partition();
      unacknowledgedOffsets.add(offset);
      lastCommittedOffset.compareAndSet(0, offset);
      currentPartition.compareAndSet(-1, partition);

      final String jsonString = msg.message();

      handler.handle(
          configuration.getVertxAddress(),
          jsonString,
          () -> {
            unacknowledgedOffsets.remove(offset);
            phaser.arriveAndDeregister();
          });

      if (unacknowledgedOffsets.size() >= configuration.getMaxUnacknowledged()
          || partititionChanged(partition)
          || tooManyUncommittedOffsets(offset)) {
        LOG.info(
            "Got {} unacknowledged messages, waiting for ACKs in order to commit",
            unacknowledgedOffsets.size());
        if (!waitForAcks(phase)) {
          return;
        }
        LOG.info("Continuing message processing");
        commitOffsetsIfAllAcknowledged(offset);
      }
    }
  }
Beispiel #2
0
 private Callable<List<SimplifiedLog>> createConsumerThread(
     ConsumerIterator<String, SimplifiedLog> iterator, int expectedMsg) {
   return () -> {
     List<SimplifiedLog> received = new ArrayList<>();
     while ((received.size() < expectedMsg) && iterator.hasNext()) {
       MessageAndMetadata data = iterator.next();
       received.add((SimplifiedLog) data.message());
       LOGGER.debug("Received message: {} | From partition: {}", data.message(), data.partition());
     }
     return received;
   };
 }
Beispiel #3
0
  @Override
  public void run(SourceContext<OUT> ctx) throws Exception {
    if (iteratorToRead == null) {
      throw new IllegalStateException("Kafka iterator not initialized properly.");
    }

    final Object checkpointLock = ctx.getCheckpointLock();

    while (running && iteratorToRead.hasNext()) {
      MessageAndMetadata<byte[], byte[]> message = iteratorToRead.next();
      if (lastOffsets.getState()[message.partition()] >= message.offset()) {
        LOG.info(
            "Skipping message with offset {} from partition {}",
            message.offset(),
            message.partition());
        continue;
      }
      OUT next = deserializationSchema.deserialize(message.message());

      if (deserializationSchema.isEndOfStream(next)) {
        LOG.info("DeserializationSchema signaled end of stream for this source");
        break;
      }

      // make the state update and the element emission atomic
      synchronized (checkpointLock) {
        lastOffsets.getState()[message.partition()] = message.offset();
        ctx.collect(next);
      }

      if (LOG.isTraceEnabled()) {
        LOG.trace(
            "Processed record with offset {} from partition {}",
            message.offset(),
            message.partition());
      }
    }
  }
Beispiel #4
0
  @Override
  public void consumeMessages() {
    // dispatcherThr.scheduleAtFixedRate(new DispatchMonitor(), 1l,1l, TimeUnit.SECONDS);

    Map<String, Integer> topicCount = new HashMap<>();
    // Define single thread for topic
    topicCount.put(topic, new Integer(1));

    Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams =
        consumer.createMessageStreams(topicCount);

    List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(topic);

    MessageBatch dataBatch = new MessageBatch();

    for (final KafkaStream stream : streams) {

      ConsumerIterator<byte[], byte[]> consumerIte = stream.iterator();

      streamHandle = consumerIte;

      while (consumerIte.hasNext()) {
        lastTimeUpdated.set(System.currentTimeMillis());

        MessageAndMetadata<byte[], byte[]> payload = consumerIte.next();

        int partitionKey = payload.partition();
        long offset = payload.offset();

        dataBatch.getDataBatch().add(payload.message());
        // TODO: work on timed sending of messages when rcvd message is smaller
        if (dataBatch.getDataBatch().size() >= maxBatchSize) {
          OffsetInfo offsetInfo = new OffsetInfo(topic, partitionKey, offset);
          dataBatch.setOffsetInfo(offsetInfo);
          // send it across
          BatchPersistManager.getInstance().submitBatch(dataBatch);

          dataBatch = new MessageBatch();
        }
      } // while

      System.out.println("Ended the while stream...");
    } // for streams

    // break in loop , send the last batch

  }
  /**
   * Refills the buffer with messages from the configured kafka topic if available.
   *
   * @return Whether the buffer contains messages to be emitted after this call.
   * @throws IllegalStateException When current buffer is not empty or messages not acknowledged by
   *     topology.
   */
  protected boolean fillBuffer() {
    if (!_inProgress.isEmpty() || !_queue.isEmpty()) {
      throw new IllegalStateException(
          "cannot fill buffer when buffer or pending messages are non-empty");
    }

    if (_iterator == null) {
      // create a stream of messages from _consumer using the streams as defined on construction
      final Map<String, List<KafkaStream<byte[], byte[]>>> streams =
          _consumer.createMessageStreams(Collections.singletonMap(_topic, 1));
      _iterator = streams.get(_topic).get(0).iterator();
    }

    // We'll iterate the stream in a try-clause; kafka stream will poll its client channel for the
    // next message,
    // throwing a ConsumerTimeoutException when the configured timeout is exceeded.
    try {
      int size = 0;
      while (size < _bufSize && _iterator.hasNext()) {
        final MessageAndMetadata<byte[], byte[]> message = _iterator.next();
        final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset());
        _inProgress.put(id, message.message());
        size++;
      }
    } catch (final ConsumerTimeoutException e) {
      // ignore, storm will call nextTuple again at some point in the near future
      // timeout does *not* mean that no messages were read (state is checked below)
    }

    if (_inProgress.size() > 0) {
      // set _queue to all currently pending kafka message ids
      _queue.addAll(_inProgress.keySet());
      LOG.debug("buffer now has {} messages to be emitted", _queue.size());
      // message(s) appended to buffer
      return true;
    } else {
      // no messages appended to buffer
      return false;
    }
  }