private void read(final KafkaStream<String, String> stream) { while (stream.iterator().hasNext()) { final int phase = phaser.register(); final MessageAndMetadata<String, String> msg = stream.iterator().next(); final long offset = msg.offset(); final long partition = msg.partition(); unacknowledgedOffsets.add(offset); lastCommittedOffset.compareAndSet(0, offset); currentPartition.compareAndSet(-1, partition); final String jsonString = msg.message(); handler.handle( configuration.getVertxAddress(), jsonString, () -> { unacknowledgedOffsets.remove(offset); phaser.arriveAndDeregister(); }); if (unacknowledgedOffsets.size() >= configuration.getMaxUnacknowledged() || partititionChanged(partition) || tooManyUncommittedOffsets(offset)) { LOG.info( "Got {} unacknowledged messages, waiting for ACKs in order to commit", unacknowledgedOffsets.size()); if (!waitForAcks(phase)) { return; } LOG.info("Continuing message processing"); commitOffsetsIfAllAcknowledged(offset); } } }
@Override public void run(SourceContext<OUT> ctx) throws Exception { if (iteratorToRead == null) { throw new IllegalStateException("Kafka iterator not initialized properly."); } final Object checkpointLock = ctx.getCheckpointLock(); while (running && iteratorToRead.hasNext()) { MessageAndMetadata<byte[], byte[]> message = iteratorToRead.next(); if (lastOffsets.getState()[message.partition()] >= message.offset()) { LOG.info( "Skipping message with offset {} from partition {}", message.offset(), message.partition()); continue; } OUT next = deserializationSchema.deserialize(message.message()); if (deserializationSchema.isEndOfStream(next)) { LOG.info("DeserializationSchema signaled end of stream for this source"); break; } // make the state update and the element emission atomic synchronized (checkpointLock) { lastOffsets.getState()[message.partition()] = message.offset(); ctx.collect(next); } if (LOG.isTraceEnabled()) { LOG.trace( "Processed record with offset {} from partition {}", message.offset(), message.partition()); } } }
@Override public void run() { ConsumerIterator<byte[], byte[]> it = m_stream.iterator(); while (it.hasNext()) { MessageAndMetadata<byte[], byte[]> md = it.next(); byte msg[] = md.message(); long offset = md.offset(); String smsg = new String(msg); try { m_loader.insertRow(new RowWithMetaData(smsg, offset), m_csvParser.parseLine(smsg)); } catch (Exception ex) { m_log.error("Consumer stopped", ex); System.exit(1); } } }
@Override public void consumeMessages() { // dispatcherThr.scheduleAtFixedRate(new DispatchMonitor(), 1l,1l, TimeUnit.SECONDS); Map<String, Integer> topicCount = new HashMap<>(); // Define single thread for topic topicCount.put(topic, new Integer(1)); Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams = consumer.createMessageStreams(topicCount); List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(topic); MessageBatch dataBatch = new MessageBatch(); for (final KafkaStream stream : streams) { ConsumerIterator<byte[], byte[]> consumerIte = stream.iterator(); streamHandle = consumerIte; while (consumerIte.hasNext()) { lastTimeUpdated.set(System.currentTimeMillis()); MessageAndMetadata<byte[], byte[]> payload = consumerIte.next(); int partitionKey = payload.partition(); long offset = payload.offset(); dataBatch.getDataBatch().add(payload.message()); // TODO: work on timed sending of messages when rcvd message is smaller if (dataBatch.getDataBatch().size() >= maxBatchSize) { OffsetInfo offsetInfo = new OffsetInfo(topic, partitionKey, offset); dataBatch.setOffsetInfo(offsetInfo); // send it across BatchPersistManager.getInstance().submitBatch(dataBatch); dataBatch = new MessageBatch(); } } // while System.out.println("Ended the while stream..."); } // for streams // break in loop , send the last batch }
/** * Refills the buffer with messages from the configured kafka topic if available. * * @return Whether the buffer contains messages to be emitted after this call. * @throws IllegalStateException When current buffer is not empty or messages not acknowledged by * topology. */ protected boolean fillBuffer() { if (!_inProgress.isEmpty() || !_queue.isEmpty()) { throw new IllegalStateException( "cannot fill buffer when buffer or pending messages are non-empty"); } if (_iterator == null) { // create a stream of messages from _consumer using the streams as defined on construction final Map<String, List<KafkaStream<byte[], byte[]>>> streams = _consumer.createMessageStreams(Collections.singletonMap(_topic, 1)); _iterator = streams.get(_topic).get(0).iterator(); } // We'll iterate the stream in a try-clause; kafka stream will poll its client channel for the // next message, // throwing a ConsumerTimeoutException when the configured timeout is exceeded. try { int size = 0; while (size < _bufSize && _iterator.hasNext()) { final MessageAndMetadata<byte[], byte[]> message = _iterator.next(); final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset()); _inProgress.put(id, message.message()); size++; } } catch (final ConsumerTimeoutException e) { // ignore, storm will call nextTuple again at some point in the near future // timeout does *not* mean that no messages were read (state is checked below) } if (_inProgress.size() > 0) { // set _queue to all currently pending kafka message ids _queue.addAll(_inProgress.keySet()); LOG.debug("buffer now has {} messages to be emitted", _queue.size()); // message(s) appended to buffer return true; } else { // no messages appended to buffer return false; } }