// ======== poll ========= private ConsumerRecords<K, V> pollKafkaBroker() { doSeekRetriableTopicPartitions(); final ConsumerRecords<K, V> consumerRecords = recordsFetcher.fetchRecords(kafkaSpoutConfig.getPollTimeoutMs()); final int numPolledRecords = consumerRecords.count(); LOG.debug( "Polled [{}] records from Kafka. [{}] uncommitted offsets across all topic partitions", numPolledRecords, numUncommittedOffsets); return consumerRecords; }
@Override public KafkaTridentSpoutBatchMetadata<K, V> emitPartitionBatch( TransactionAttempt tx, TridentCollector collector, KafkaTridentSpoutTopicPartition partitionTs, KafkaTridentSpoutBatchMetadata<K, V> lastBatch) { LOG.debug( "Emitting batch: [transaction = {}], [partition = {}], [collector = {}], [lastBatchMetadata = {}]", tx, partitionTs, collector, lastBatch); final TopicPartition topicPartition = partitionTs.getTopicPartition(); KafkaTridentSpoutBatchMetadata<K, V> currentBatch = lastBatch; Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet(); try { // pause other topic partitions to only poll from current topic partition pausedTopicPartitions = pauseTopicPartitions(topicPartition); seek(topicPartition, lastBatch); // poll final ConsumerRecords<K, V> records = kafkaConsumer.poll(pollTimeoutMs); LOG.debug("Polled [{}] records from Kafka.", records.count()); if (!records.isEmpty()) { emitTuples(collector, records); // build new metadata currentBatch = new KafkaTridentSpoutBatchMetadata<>(topicPartition, records, lastBatch); } } finally { kafkaConsumer.resume(pausedTopicPartitions); LOG.trace("Resumed topic partitions [{}]", pausedTopicPartitions); } LOG.debug("Current batch metadata {}", currentBatch); return currentBatch; }
public static void main(String[] args) throws UnknownHostException { Properties props = new Properties(); props.put("bootstrap.servers", "kafka01:9092,kafka02:9092,kafka03:9092"); props.put("group.id", "test"); props.put("enable.auto.commit", "true"); props.put("auto.commit.interval.ms", "1000"); props.put("session.timeout.ms", "30000"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props); consumer.subscribe(Arrays.asList("apt-receive1")); List<TopicPartition> partitions = new ArrayList<>(); // partitions.add(new TopicPartition("apt-receive1", 2)); // partitions.add(new TopicPartition("apt-receive1", 13)); // consumer.assign(partitions); for (int i = 0; i < 10000; i++) { ConsumerRecords<String, byte[]> records = consumer.poll(100); System.out.println(i + ": " + records.count()); for (ConsumerRecord<String, byte[]> record : records) { // System.out.println(record.key()); bloom[Integer.parseInt(record.key())] = 1; } // if (sum == 10000) { // System.out.println("sum=" + sum); // break; // } } for (int j = 0; j < 10_000_000; j++) { if (bloom[j] == 0) { System.err.println("" + j); } } consumer.close(); System.err.println("Finish!"); }
@Test public void testOnConsumeChain() { List<ConsumerInterceptor<Integer, Integer>> interceptorList = new ArrayList<>(); // we are testing two different interceptors by configuring the same interceptor differently, // which is not // how it would be done in KafkaConsumer, but ok for testing interceptor callbacks FilterConsumerInterceptor<Integer, Integer> interceptor1 = new FilterConsumerInterceptor<>(filterPartition1); FilterConsumerInterceptor<Integer, Integer> interceptor2 = new FilterConsumerInterceptor<>(filterPartition2); interceptorList.add(interceptor1); interceptorList.add(interceptor2); ConsumerInterceptors<Integer, Integer> interceptors = new ConsumerInterceptors<>(interceptorList); // verify that onConsumer modifies ConsumerRecords Map<TopicPartition, List<ConsumerRecord<Integer, Integer>>> records = new HashMap<>(); List<ConsumerRecord<Integer, Integer>> list1 = new ArrayList<>(); list1.add(consumerRecord); List<ConsumerRecord<Integer, Integer>> list2 = new ArrayList<>(); list2.add( new ConsumerRecord<>( filterTopicPart1.topic(), filterTopicPart1.partition(), 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, 1, 1)); List<ConsumerRecord<Integer, Integer>> list3 = new ArrayList<>(); list3.add( new ConsumerRecord<>( filterTopicPart2.topic(), filterTopicPart2.partition(), 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, 1, 1)); records.put(tp, list1); records.put(filterTopicPart1, list2); records.put(filterTopicPart2, list3); ConsumerRecords<Integer, Integer> consumerRecords = new ConsumerRecords<>(records); ConsumerRecords<Integer, Integer> interceptedRecords = interceptors.onConsume(consumerRecords); assertEquals(1, interceptedRecords.count()); assertTrue(interceptedRecords.partitions().contains(tp)); assertFalse(interceptedRecords.partitions().contains(filterTopicPart1)); assertFalse(interceptedRecords.partitions().contains(filterTopicPart2)); assertEquals(2, onConsumeCount); // verify that even if one of the intermediate interceptors throws an exception, all // interceptors' onConsume are called interceptor1.injectOnConsumeError(true); ConsumerRecords<Integer, Integer> partInterceptedRecs = interceptors.onConsume(consumerRecords); assertEquals(2, partInterceptedRecs.count()); assertTrue( partInterceptedRecs .partitions() .contains(filterTopicPart1)); // since interceptor1 threw exception assertFalse( partInterceptedRecs .partitions() .contains(filterTopicPart2)); // interceptor2 should still be called assertEquals(4, onConsumeCount); // if all interceptors throw an exception, records should be unmodified interceptor2.injectOnConsumeError(true); ConsumerRecords<Integer, Integer> noneInterceptedRecs = interceptors.onConsume(consumerRecords); assertEquals(noneInterceptedRecs, consumerRecords); assertEquals(3, noneInterceptedRecs.count()); assertEquals(6, onConsumeCount); interceptors.close(); }
@Override public void drive() { // A Consumer is not thread-safe // {@see // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html} // {@see // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#multithreaded} try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerDefinition.getKafkaConfig())) { String topic = consumerDefinition.getTopic(); log.info("Subscribing to {}", topic); if (consumerRebalanceListener == null) { consumer.subscribe(Collections.singletonList(topic)); } else { consumer.subscribe(Collections.singletonList(topic), consumerRebalanceListener); } long messagesToReceive = consumerDefinition.getMessagesToReceive(); log.info("Expecting {} messages", messagesToReceive); StopWatch stopWatch = new StopWatch(); stopWatch.start(); do { ConsumerRecords<byte[], byte[]> records = consumer.poll(consumerDefinition.getPollTimeout()); if (records == null) { throw new IllegalStateException("null ConsumerRecords polled"); } else { if (records.count() == 0) { try { log.info("No records fetched, pausing"); Thread.sleep(1000); } catch (InterruptedException e) { throw new RuntimeException(e); } } else { if (log.isTraceEnabled()) { log.trace("Fetched {} records", records.count()); } for (ConsumerRecord<byte[], byte[]> record : records) { recordsFetched += 1; applyReceiveDelay(); if (recordsFetched % consumerDefinition.getReportReceivedEvery() == 0) { log.info("Received {} messages", recordsFetched); } } } } if (isShutdownRequested()) { break; } stopWatch.split(); } while ((recordsFetched < messagesToReceive) && (stopWatch.getSplitTime() < consumerDefinition.getTestRunTimeout())); stopWatch.stop(); if (isShutdownRequested()) { log.info("Shutting down"); } else { long runTime = stopWatch.getTime(); log.info("Done. Consumer received {} msgs in {} ms", messagesToReceive, runTime); double averageThroughput = (1000d / runTime) * messagesToReceive; log.info("Average throughput: {} msg/s", averageThroughput); } } finally { log.debug("Consumer closed"); if (completionLatch != null) { completionLatch.countDown(); } } }