// returns paused topic partitions private Collection<TopicPartition> pauseTopicPartitions(TopicPartition excludedTp) { final Set<TopicPartition> pausedTopicPartitions = new HashSet<>(kafkaConsumer.assignment()); LOG.debug("Currently assigned topic partitions [{}]", pausedTopicPartitions); pausedTopicPartitions.remove(excludedTp); kafkaConsumer.pause(pausedTopicPartitions); LOG.trace("Paused topic partitions [{}]", pausedTopicPartitions); return pausedTopicPartitions; }
private static void execute() throws InterruptedException { KafkaConsumer<String, String> consumer = createConsumer(); // Subscribe to all partition in that topic. 'assign' could be used here // instead of 'subscribe' to subscribe to specific partition. consumer.subscribe(Arrays.asList("normal-topic")); processRecords(consumer); }
private void commitOffsetsForAckedTuples() { // Find offsets that are ready to be committed for every topic partition final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>(); for (Map.Entry<TopicPartition, OffsetEntry> tpOffset : acked.entrySet()) { final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset(); if (nextCommitOffset != null) { nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset); } } // Commit offsets that are ready to be committed for every topic partition if (!nextCommitOffsets.isEmpty()) { kafkaConsumer.commitSync(nextCommitOffsets); LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets); // Instead of iterating again, it would be possible to commit and update the state for each // TopicPartition // in the prior loop, but the multiple network calls should be more expensive than iterating // twice over a small loop for (Map.Entry<TopicPartition, OffsetEntry> tpOffset : acked.entrySet()) { final OffsetEntry offsetEntry = tpOffset.getValue(); offsetEntry.commit(nextCommitOffsets.get(tpOffset.getKey())); } } else { LOG.trace("No offsets to commit. {}", this); } }
private void doSeekRetriableTopicPartitions() { final Set<TopicPartition> retriableTopicPartitions = retryService.retriableTopicPartitions(); for (TopicPartition rtp : retriableTopicPartitions) { final OffsetAndMetadata offsetAndMeta = acked.get(rtp).findNextCommitOffset(); if (offsetAndMeta != null) { kafkaConsumer.seek( rtp, offsetAndMeta.offset() + 1); // seek to the next offset that is ready to commit in next commit cycle } else { kafkaConsumer.seek( rtp, acked.get(rtp).committedOffset + 1); // Seek to last committed offset } } }
private void shutdown() { try { if (!consumerAutoCommitMode) { commitOffsetsForAckedTuples(); } } finally { // remove resources kafkaConsumer.close(); } }
@Override public KafkaTridentSpoutBatchMetadata<K, V> emitPartitionBatch( TransactionAttempt tx, TridentCollector collector, KafkaTridentSpoutTopicPartition partitionTs, KafkaTridentSpoutBatchMetadata<K, V> lastBatch) { LOG.debug( "Emitting batch: [transaction = {}], [partition = {}], [collector = {}], [lastBatchMetadata = {}]", tx, partitionTs, collector, lastBatch); final TopicPartition topicPartition = partitionTs.getTopicPartition(); KafkaTridentSpoutBatchMetadata<K, V> currentBatch = lastBatch; Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet(); try { // pause other topic partitions to only poll from current topic partition pausedTopicPartitions = pauseTopicPartitions(topicPartition); seek(topicPartition, lastBatch); // poll final ConsumerRecords<K, V> records = kafkaConsumer.poll(pollTimeoutMs); LOG.debug("Polled [{}] records from Kafka.", records.count()); if (!records.isEmpty()) { emitTuples(collector, records); // build new metadata currentBatch = new KafkaTridentSpoutBatchMetadata<>(topicPartition, records, lastBatch); } } finally { kafkaConsumer.resume(pausedTopicPartitions); LOG.trace("Resumed topic partitions [{}]", pausedTopicPartitions); } LOG.debug("Current batch metadata {}", currentBatch); return currentBatch; }
private static void processRecords(KafkaConsumer<String, String> consumer) throws InterruptedException { while (true) { ConsumerRecords<String, String> records = consumer.poll(100); long lastOffset = 0; for (ConsumerRecord<String, String> record : records) { System.out.printf( "\n\roffset = %d, key = %s, value = %s", record.offset(), record.key(), record.value()); lastOffset = record.offset(); } System.out.println("lastOffset read: " + lastOffset); process(); // Below call is important to control the offset commit. Do this call after you // finish processing the business process to get the at least once guarantee. consumer.commitSync(); } }
public static void main(String[] args) throws UnknownHostException { Properties props = new Properties(); props.put("bootstrap.servers", "kafka01:9092,kafka02:9092,kafka03:9092"); props.put("group.id", "test"); props.put("enable.auto.commit", "true"); props.put("auto.commit.interval.ms", "1000"); props.put("session.timeout.ms", "30000"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props); consumer.subscribe(Arrays.asList("apt-receive1")); List<TopicPartition> partitions = new ArrayList<>(); // partitions.add(new TopicPartition("apt-receive1", 2)); // partitions.add(new TopicPartition("apt-receive1", 13)); // consumer.assign(partitions); for (int i = 0; i < 10000; i++) { ConsumerRecords<String, byte[]> records = consumer.poll(100); System.out.println(i + ": " + records.count()); for (ConsumerRecord<String, byte[]> record : records) { // System.out.println(record.key()); bloom[Integer.parseInt(record.key())] = 1; } // if (sum == 10000) { // System.out.println("sum=" + sum); // break; // } } for (int j = 0; j < 10_000_000; j++) { if (bloom[j] == 0) { System.err.println("" + j); } } consumer.close(); System.err.println("Finish!"); }
/** * Determines the offset of the next fetch. For failed batches lastBatchMeta is not null and * contains the fetch offset of the failed batch. In this scenario the next fetch will take place * at the offset of the failed batch. When the previous batch is successful, lastBatchMeta is * null, and the offset of the next fetch is either the offset of the last commit to kafka, or if * no commit was yet made, the offset dictated by {@link KafkaSpoutConfig.FirstPollOffsetStrategy} * * @return the offset of the next fetch */ private long seek(TopicPartition tp, KafkaTridentSpoutBatchMetadata<K, V> lastBatchMeta) { if (lastBatchMeta != null) { kafkaConsumer.seek( tp, lastBatchMeta.getLastOffset() + 1); // seek next offset after last offset from previous batch LOG.debug("Seeking fetch offset to next offset after last offset from previous batch"); } else { LOG.debug("Seeking fetch offset from firstPollOffsetStrategy and last commit to Kafka"); final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp); if (committedOffset != null) { // offset was committed for this TopicPartition if (firstPollOffsetStrategy.equals(EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); } else if (firstPollOffsetStrategy.equals(LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); } else { // By default polling starts at the last committed offset. +1 to point fetch to the first // uncommitted offset. kafkaConsumer.seek(tp, committedOffset.offset() + 1); } } else { // no commits have ever been done, so start at the beginning or end depending on the // strategy if (firstPollOffsetStrategy.equals(EARLIEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); } else if (firstPollOffsetStrategy.equals(LATEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); } } } final long fetchOffset = kafkaConsumer.position(tp); LOG.debug("Set [fetchOffset = {}]", fetchOffset); return fetchOffset; }
@Override public void close() { kafkaConsumer.close(); LOG.debug("Closed"); }
/** @see stream.io.AbstractStream#init() */ @Override public void init() throws Exception { super.init(); if (group == null) { group = UUID.randomUUID().toString(); } Properties props = new Properties(); props.put("zookeeper.connect", zookeeper); props.put("metadata.broker.list", broker); props.put("bootstrap.servers", broker); props.put("group.id", group); props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("enable.auto.commit", "true"); props.put("auto.commit.interval.ms", "10000"); log.debug("Creating kafka consumer..."); final KafkaConsumer<byte[], byte[]> kc = new KafkaConsumer<byte[], byte[]>(props); consumer = kc; log.info("Subscribing to topic '{}'", topic); consumer.subscribe(Arrays.asList(topic)); log.info("Using codec {}", valueCodec); List<PartitionInfo> partitionInfo = consumer.partitionsFor(topic); log.debug("topic '{}' has {} partitions", topic, partitionInfo.size()); pollThread = new Thread() { public void run() { try { running.set(true); while (running.get()) { ConsumerRecords<byte[], byte[]> messages = consumer.poll(100L); log.debug("Polled new messages: {}", messages); while (messages == null || messages.count() < 1) { messages = consumer.poll(1000L); log.debug("Polled new messages: {}", messages); } if (messages != null) { Iterator<ConsumerRecord<byte[], byte[]>> it = messages.iterator(); while (it.hasNext()) { ConsumerRecord<byte[], byte[]> record = it.next(); Object key = record.key(); byte[] data = record.value(); Data item = null; if (valueCodec != null) { log.debug("de-serializing item using {}", valueCodec); item = valueCodec.decode(data); } else { item = DataFactory.create(); if (key != null) { item.put("@kafka:key", (Serializable) key); } item.put("@kafka:value", data); } item.put("@kafka:topic", record.topic()); item.put("@kafka:partition", record.partition()); queue.add(item); } } } } catch (Exception e) { e.printStackTrace(); } finally { log.info("Closing topic-stream reader..."); queue.add(endOfStream); } } }; pollThread.setDaemon(true); pollThread.start(); Signals.register( new Hook() { @Override public void signal(int flags) { log.info("Received shutdown-signal!"); running.set(false); pollThread.interrupt(); log.info("Adding EOF to queue..."); queue.add(endOfStream); } }); }
@Override public void drive() { // A Consumer is not thread-safe // {@see // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html} // {@see // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#multithreaded} try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerDefinition.getKafkaConfig())) { String topic = consumerDefinition.getTopic(); log.info("Subscribing to {}", topic); if (consumerRebalanceListener == null) { consumer.subscribe(Collections.singletonList(topic)); } else { consumer.subscribe(Collections.singletonList(topic), consumerRebalanceListener); } long messagesToReceive = consumerDefinition.getMessagesToReceive(); log.info("Expecting {} messages", messagesToReceive); StopWatch stopWatch = new StopWatch(); stopWatch.start(); do { ConsumerRecords<byte[], byte[]> records = consumer.poll(consumerDefinition.getPollTimeout()); if (records == null) { throw new IllegalStateException("null ConsumerRecords polled"); } else { if (records.count() == 0) { try { log.info("No records fetched, pausing"); Thread.sleep(1000); } catch (InterruptedException e) { throw new RuntimeException(e); } } else { if (log.isTraceEnabled()) { log.trace("Fetched {} records", records.count()); } for (ConsumerRecord<byte[], byte[]> record : records) { recordsFetched += 1; applyReceiveDelay(); if (recordsFetched % consumerDefinition.getReportReceivedEvery() == 0) { log.info("Received {} messages", recordsFetched); } } } } if (isShutdownRequested()) { break; } stopWatch.split(); } while ((recordsFetched < messagesToReceive) && (stopWatch.getSplitTime() < consumerDefinition.getTestRunTimeout())); stopWatch.stop(); if (isShutdownRequested()) { log.info("Shutting down"); } else { long runTime = stopWatch.getTime(); log.info("Done. Consumer received {} msgs in {} ms", messagesToReceive, runTime); double averageThroughput = (1000d / runTime) * messagesToReceive; log.info("Average throughput: {} msg/s", averageThroughput); } } finally { log.debug("Consumer closed"); if (completionLatch != null) { completionLatch.countDown(); } } }