private void read(final KafkaStream<String, String> stream) {
    while (stream.iterator().hasNext()) {
      final int phase = phaser.register();

      final MessageAndMetadata<String, String> msg = stream.iterator().next();
      final long offset = msg.offset();
      final long partition = msg.partition();
      unacknowledgedOffsets.add(offset);
      lastCommittedOffset.compareAndSet(0, offset);
      currentPartition.compareAndSet(-1, partition);

      final String jsonString = msg.message();

      handler.handle(
          configuration.getVertxAddress(),
          jsonString,
          () -> {
            unacknowledgedOffsets.remove(offset);
            phaser.arriveAndDeregister();
          });

      if (unacknowledgedOffsets.size() >= configuration.getMaxUnacknowledged()
          || partititionChanged(partition)
          || tooManyUncommittedOffsets(offset)) {
        LOG.info(
            "Got {} unacknowledged messages, waiting for ACKs in order to commit",
            unacknowledgedOffsets.size());
        if (!waitForAcks(phase)) {
          return;
        }
        LOG.info("Continuing message processing");
        commitOffsetsIfAllAcknowledged(offset);
      }
    }
  }
 public void run() {
   ConsumerIterator<byte[], byte[]> it = stream.iterator();
   while (it.hasNext()) {
     MessageAndMetadata<byte[], byte[]> data = it.next();
     byte[] mBytes = data.message();
     Message message = JSONSerializer.INSTANCE.fromBytes(mBytes, Message.class);
     handler.onMessage(message);
   }
 }
  /**
   * Compare strings in content to the messages in Kafka topic
   *
   * @param content
   * @throws UnsupportedEncodingException
   */
  protected void validateContent(String[] content) throws UnsupportedEncodingException {

    Set<String> inputSet = new HashSet<String>(Arrays.asList(content));
    Set<String> outputSet = new HashSet<String>();

    for (String str : content) {
      MessageAndMetadata<byte[], byte[]> fetchedMsg = testUtil.getNextMessageFromConsumer(topic);
      outputSet.add(toText(new String(fetchedMsg.message(), "UTF-8")));
    }

    Assert.assertEquals(inputSet, outputSet);
  }
Example #4
0
 private Callable<List<SimplifiedLog>> createConsumerThread(
     ConsumerIterator<String, SimplifiedLog> iterator, int expectedMsg) {
   return () -> {
     List<SimplifiedLog> received = new ArrayList<>();
     while ((received.size() < expectedMsg) && iterator.hasNext()) {
       MessageAndMetadata data = iterator.next();
       received.add((SimplifiedLog) data.message());
       LOGGER.debug("Received message: {} | From partition: {}", data.message(), data.partition());
     }
     return received;
   };
 }
  private void run() {
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();

    // Hard coding single threaded consumer
    topicCountMap.put(inputTopic, 1);

    Properties props = createConsumerConfig(zookeeper, groupId, url);
    VerifiableProperties vProps = new VerifiableProperties(props);

    // Create decoders for key and value
    KafkaAvroDecoder avroDecoder = new KafkaAvroDecoder(vProps);
    StringDecoder stringDecoder = new StringDecoder(new VerifiableProperties());

    KafkaStream stream =
        consumer
            .createMessageStreams(topicCountMap, stringDecoder, avroDecoder)
            .get(inputTopic)
            .get(0);
    ConsumerIterator it = stream.iterator();
    System.out.println("Ready to start iterating wih properties: " + props.toString());
    System.out.println("Reading topic:" + inputTopic);

    while (it.hasNext()) {
      MessageAndMetadata messageAndMetadata = it.next();
      String ip = (String) messageAndMetadata.key();

      // Once we release a new version of the avro deserializer that can return SpecificData, the
      // deep copy will be unnecessary
      GenericRecord genericEvent = (GenericRecord) messageAndMetadata.message();
      LogLine event = (LogLine) SpecificData.get().deepCopy(LogLine.SCHEMA$, genericEvent);

      SessionState oldState = state.get(ip);
      int sessionId = 0;
      if (oldState == null) {
        state.put(ip, new SessionState(event.getTimestamp(), 0));
      } else {
        sessionId = oldState.getSessionId();

        // if the old timestamp is more than 30 minutes older than new one, we have a new session
        if (oldState.getLastConnection() < event.getTimestamp() - sessionLengthMs)
          sessionId = sessionId + 1;

        SessionState newState = new SessionState(event.getTimestamp(), sessionId);
        state.put(ip, newState);
      }
      event.setSessionid(sessionId);
      System.out.println(event.toString());
      ProducerRecord<String, LogLine> record =
          new ProducerRecord<String, LogLine>(outputTopic, event.getIp().toString(), event);
      producer.send(record);
    }
  }
Example #6
0
 @Override
 public void run() {
   ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
   while (it.hasNext()) {
     MessageAndMetadata<byte[], byte[]> md = it.next();
     byte msg[] = md.message();
     long offset = md.offset();
     String smsg = new String(msg);
     try {
       m_loader.insertRow(new RowWithMetaData(smsg, offset), m_csvParser.parseLine(smsg));
     } catch (Exception ex) {
       m_log.error("Consumer stopped", ex);
       System.exit(1);
     }
   }
 }
Example #7
0
  @Override
  public void consumeMessages() {
    // dispatcherThr.scheduleAtFixedRate(new DispatchMonitor(), 1l,1l, TimeUnit.SECONDS);

    Map<String, Integer> topicCount = new HashMap<>();
    // Define single thread for topic
    topicCount.put(topic, new Integer(1));

    Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams =
        consumer.createMessageStreams(topicCount);

    List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(topic);

    MessageBatch dataBatch = new MessageBatch();

    for (final KafkaStream stream : streams) {

      ConsumerIterator<byte[], byte[]> consumerIte = stream.iterator();

      streamHandle = consumerIte;

      while (consumerIte.hasNext()) {
        lastTimeUpdated.set(System.currentTimeMillis());

        MessageAndMetadata<byte[], byte[]> payload = consumerIte.next();

        int partitionKey = payload.partition();
        long offset = payload.offset();

        dataBatch.getDataBatch().add(payload.message());
        // TODO: work on timed sending of messages when rcvd message is smaller
        if (dataBatch.getDataBatch().size() >= maxBatchSize) {
          OffsetInfo offsetInfo = new OffsetInfo(topic, partitionKey, offset);
          dataBatch.setOffsetInfo(offsetInfo);
          // send it across
          BatchPersistManager.getInstance().submitBatch(dataBatch);

          dataBatch = new MessageBatch();
        }
      } // while

      System.out.println("Ended the while stream...");
    } // for streams

    // break in loop , send the last batch

  }
Example #8
0
  /**
   * Refills the buffer with messages from the configured kafka topic if available.
   *
   * @return Whether the buffer contains messages to be emitted after this call.
   * @throws IllegalStateException When current buffer is not empty or messages not acknowledged by
   *     topology.
   */
  protected boolean fillBuffer() {
    if (!_inProgress.isEmpty() || !_queue.isEmpty()) {
      throw new IllegalStateException(
          "cannot fill buffer when buffer or pending messages are non-empty");
    }

    if (_iterator == null) {
      // create a stream of messages from _consumer using the streams as defined on construction
      final Map<String, List<KafkaStream<byte[], byte[]>>> streams =
          _consumer.createMessageStreams(Collections.singletonMap(_topic, 1));
      _iterator = streams.get(_topic).get(0).iterator();
    }

    // We'll iterate the stream in a try-clause; kafka stream will poll its client channel for the
    // next message,
    // throwing a ConsumerTimeoutException when the configured timeout is exceeded.
    try {
      int size = 0;
      while (size < _bufSize && _iterator.hasNext()) {
        final MessageAndMetadata<byte[], byte[]> message = _iterator.next();
        final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset());
        _inProgress.put(id, message.message());
        size++;
      }
    } catch (final ConsumerTimeoutException e) {
      // ignore, storm will call nextTuple again at some point in the near future
      // timeout does *not* mean that no messages were read (state is checked below)
    }

    if (_inProgress.size() > 0) {
      // set _queue to all currently pending kafka message ids
      _queue.addAll(_inProgress.keySet());
      LOG.debug("buffer now has {} messages to be emitted", _queue.size());
      // message(s) appended to buffer
      return true;
    } else {
      // no messages appended to buffer
      return false;
    }
  }
Example #9
0
  @Override
  public void run(SourceContext<OUT> ctx) throws Exception {
    if (iteratorToRead == null) {
      throw new IllegalStateException("Kafka iterator not initialized properly.");
    }

    final Object checkpointLock = ctx.getCheckpointLock();

    while (running && iteratorToRead.hasNext()) {
      MessageAndMetadata<byte[], byte[]> message = iteratorToRead.next();
      if (lastOffsets.getState()[message.partition()] >= message.offset()) {
        LOG.info(
            "Skipping message with offset {} from partition {}",
            message.offset(),
            message.partition());
        continue;
      }
      OUT next = deserializationSchema.deserialize(message.message());

      if (deserializationSchema.isEndOfStream(next)) {
        LOG.info("DeserializationSchema signaled end of stream for this source");
        break;
      }

      // make the state update and the element emission atomic
      synchronized (checkpointLock) {
        lastOffsets.getState()[message.partition()] = message.offset();
        ctx.collect(next);
      }

      if (LOG.isTraceEnabled()) {
        LOG.trace(
            "Processed record with offset {} from partition {}",
            message.offset(),
            message.partition());
      }
    }
  }