public void setWaitingToEmit(ConsumerRecords<K, V> consumerRecords) {
   List<ConsumerRecord<K, V>> waitingToEmitList = new LinkedList<>();
   for (TopicPartition tp : consumerRecords.partitions()) {
     waitingToEmitList.addAll(consumerRecords.records(tp));
   }
   waitingToEmit = waitingToEmitList.iterator();
 }
  // ======== poll =========
  private ConsumerRecords<K, V> pollKafkaBroker() {
    doSeekRetriableTopicPartitions();

    final ConsumerRecords<K, V> consumerRecords =
        recordsFetcher.fetchRecords(kafkaSpoutConfig.getPollTimeoutMs());
    final int numPolledRecords = consumerRecords.count();
    LOG.debug(
        "Polled [{}] records from Kafka. [{}] uncommitted offsets across all topic partitions",
        numPolledRecords,
        numUncommittedOffsets);
    return consumerRecords;
  }
    @Override
    public ConsumerRecords<K, V> onConsume(ConsumerRecords<K, V> records) {
      onConsumeCount++;
      if (throwExceptionOnConsume)
        throw new KafkaException("Injected exception in FilterConsumerInterceptor.onConsume.");

      // filters out topic/partitions with partition == FILTER_PARTITION
      Map<TopicPartition, List<ConsumerRecord<K, V>>> recordMap = new HashMap<>();
      for (TopicPartition tp : records.partitions()) {
        if (tp.partition() != filterPartition) recordMap.put(tp, records.records(tp));
      }
      return new ConsumerRecords<K, V>(recordMap);
    }
  @Override
  public KafkaTridentSpoutBatchMetadata<K, V> emitPartitionBatch(
      TransactionAttempt tx,
      TridentCollector collector,
      KafkaTridentSpoutTopicPartition partitionTs,
      KafkaTridentSpoutBatchMetadata<K, V> lastBatch) {
    LOG.debug(
        "Emitting batch: [transaction = {}], [partition = {}], [collector = {}], [lastBatchMetadata = {}]",
        tx,
        partitionTs,
        collector,
        lastBatch);

    final TopicPartition topicPartition = partitionTs.getTopicPartition();
    KafkaTridentSpoutBatchMetadata<K, V> currentBatch = lastBatch;
    Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet();

    try {
      // pause other topic partitions to only poll from current topic partition
      pausedTopicPartitions = pauseTopicPartitions(topicPartition);

      seek(topicPartition, lastBatch);

      // poll
      final ConsumerRecords<K, V> records = kafkaConsumer.poll(pollTimeoutMs);
      LOG.debug("Polled [{}] records from Kafka.", records.count());

      if (!records.isEmpty()) {
        emitTuples(collector, records);
        // build new metadata
        currentBatch = new KafkaTridentSpoutBatchMetadata<>(topicPartition, records, lastBatch);
      }
    } finally {
      kafkaConsumer.resume(pausedTopicPartitions);
      LOG.trace("Resumed topic partitions [{}]", pausedTopicPartitions);
    }
    LOG.debug("Current batch metadata {}", currentBatch);
    return currentBatch;
  }
Beispiel #5
0
  public static void main(String[] args) throws UnknownHostException {
    Properties props = new Properties();
    props.put("bootstrap.servers", "kafka01:9092,kafka02:9092,kafka03:9092");
    props.put("group.id", "test");
    props.put("enable.auto.commit", "true");
    props.put("auto.commit.interval.ms", "1000");
    props.put("session.timeout.ms", "30000");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList("apt-receive1"));
    List<TopicPartition> partitions = new ArrayList<>();
    //		partitions.add(new TopicPartition("apt-receive1", 2));
    //		partitions.add(new TopicPartition("apt-receive1", 13));
    //		consumer.assign(partitions);
    for (int i = 0; i < 10000; i++) {
      ConsumerRecords<String, byte[]> records = consumer.poll(100);
      System.out.println(i + ": " + records.count());
      for (ConsumerRecord<String, byte[]> record : records) {
        //				System.out.println(record.key());
        bloom[Integer.parseInt(record.key())] = 1;
      }
      //			if (sum == 10000) {
      //				System.out.println("sum=" + sum);
      //				break;
      //			}

    }
    for (int j = 0; j < 10_000_000; j++) {
      if (bloom[j] == 0) {
        System.err.println("" + j);
      }
    }
    consumer.close();
    System.err.println("Finish!");
  }
  @Test
  public void testOnConsumeChain() {
    List<ConsumerInterceptor<Integer, Integer>> interceptorList = new ArrayList<>();
    // we are testing two different interceptors by configuring the same interceptor differently,
    // which is not
    // how it would be done in KafkaConsumer, but ok for testing interceptor callbacks
    FilterConsumerInterceptor<Integer, Integer> interceptor1 =
        new FilterConsumerInterceptor<>(filterPartition1);
    FilterConsumerInterceptor<Integer, Integer> interceptor2 =
        new FilterConsumerInterceptor<>(filterPartition2);
    interceptorList.add(interceptor1);
    interceptorList.add(interceptor2);
    ConsumerInterceptors<Integer, Integer> interceptors =
        new ConsumerInterceptors<>(interceptorList);

    // verify that onConsumer modifies ConsumerRecords
    Map<TopicPartition, List<ConsumerRecord<Integer, Integer>>> records = new HashMap<>();
    List<ConsumerRecord<Integer, Integer>> list1 = new ArrayList<>();
    list1.add(consumerRecord);
    List<ConsumerRecord<Integer, Integer>> list2 = new ArrayList<>();
    list2.add(
        new ConsumerRecord<>(
            filterTopicPart1.topic(),
            filterTopicPart1.partition(),
            0,
            0L,
            TimestampType.CREATE_TIME,
            0L,
            0,
            0,
            1,
            1));
    List<ConsumerRecord<Integer, Integer>> list3 = new ArrayList<>();
    list3.add(
        new ConsumerRecord<>(
            filterTopicPart2.topic(),
            filterTopicPart2.partition(),
            0,
            0L,
            TimestampType.CREATE_TIME,
            0L,
            0,
            0,
            1,
            1));
    records.put(tp, list1);
    records.put(filterTopicPart1, list2);
    records.put(filterTopicPart2, list3);
    ConsumerRecords<Integer, Integer> consumerRecords = new ConsumerRecords<>(records);
    ConsumerRecords<Integer, Integer> interceptedRecords = interceptors.onConsume(consumerRecords);
    assertEquals(1, interceptedRecords.count());
    assertTrue(interceptedRecords.partitions().contains(tp));
    assertFalse(interceptedRecords.partitions().contains(filterTopicPart1));
    assertFalse(interceptedRecords.partitions().contains(filterTopicPart2));
    assertEquals(2, onConsumeCount);

    // verify that even if one of the intermediate interceptors throws an exception, all
    // interceptors' onConsume are called
    interceptor1.injectOnConsumeError(true);
    ConsumerRecords<Integer, Integer> partInterceptedRecs = interceptors.onConsume(consumerRecords);
    assertEquals(2, partInterceptedRecs.count());
    assertTrue(
        partInterceptedRecs
            .partitions()
            .contains(filterTopicPart1)); // since interceptor1 threw exception
    assertFalse(
        partInterceptedRecs
            .partitions()
            .contains(filterTopicPart2)); // interceptor2 should still be called
    assertEquals(4, onConsumeCount);

    // if all interceptors throw an exception, records should be unmodified
    interceptor2.injectOnConsumeError(true);
    ConsumerRecords<Integer, Integer> noneInterceptedRecs = interceptors.onConsume(consumerRecords);
    assertEquals(noneInterceptedRecs, consumerRecords);
    assertEquals(3, noneInterceptedRecs.count());
    assertEquals(6, onConsumeCount);

    interceptors.close();
  }
  @Override
  public void drive() {
    // A Consumer is not thread-safe
    // {@see
    // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html}
    // {@see
    // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#multithreaded}
    try (KafkaConsumer<byte[], byte[]> consumer =
        new KafkaConsumer<>(consumerDefinition.getKafkaConfig())) {

      String topic = consumerDefinition.getTopic();
      log.info("Subscribing to {}", topic);
      if (consumerRebalanceListener == null) {
        consumer.subscribe(Collections.singletonList(topic));
      } else {
        consumer.subscribe(Collections.singletonList(topic), consumerRebalanceListener);
      }

      long messagesToReceive = consumerDefinition.getMessagesToReceive();
      log.info("Expecting {} messages", messagesToReceive);

      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      do {
        ConsumerRecords<byte[], byte[]> records =
            consumer.poll(consumerDefinition.getPollTimeout());
        if (records == null) {
          throw new IllegalStateException("null ConsumerRecords polled");
        } else {
          if (records.count() == 0) {
            try {
              log.info("No records fetched, pausing");
              Thread.sleep(1000);
            } catch (InterruptedException e) {
              throw new RuntimeException(e);
            }
          } else {
            if (log.isTraceEnabled()) {
              log.trace("Fetched {} records", records.count());
            }
            for (ConsumerRecord<byte[], byte[]> record : records) {
              recordsFetched += 1;
              applyReceiveDelay();
              if (recordsFetched % consumerDefinition.getReportReceivedEvery() == 0) {
                log.info("Received {} messages", recordsFetched);
              }
            }
          }
        }

        if (isShutdownRequested()) {
          break;
        }
        stopWatch.split();
      } while ((recordsFetched < messagesToReceive)
          && (stopWatch.getSplitTime() < consumerDefinition.getTestRunTimeout()));

      stopWatch.stop();
      if (isShutdownRequested()) {
        log.info("Shutting down");
      } else {
        long runTime = stopWatch.getTime();
        log.info("Done. Consumer received {} msgs in {} ms", messagesToReceive, runTime);

        double averageThroughput = (1000d / runTime) * messagesToReceive;
        log.info("Average throughput: {} msg/s", averageThroughput);
      }

    } finally {
      log.debug("Consumer closed");
      if (completionLatch != null) {
        completionLatch.countDown();
      }
    }
  }