예제 #1
0
 // returns paused topic partitions
 private Collection<TopicPartition> pauseTopicPartitions(TopicPartition excludedTp) {
   final Set<TopicPartition> pausedTopicPartitions = new HashSet<>(kafkaConsumer.assignment());
   LOG.debug("Currently assigned topic partitions [{}]", pausedTopicPartitions);
   pausedTopicPartitions.remove(excludedTp);
   kafkaConsumer.pause(pausedTopicPartitions);
   LOG.trace("Paused topic partitions [{}]", pausedTopicPartitions);
   return pausedTopicPartitions;
 }
  private static void execute() throws InterruptedException {

    KafkaConsumer<String, String> consumer = createConsumer();

    // Subscribe to all partition in that topic. 'assign' could be used here
    // instead of 'subscribe' to subscribe to specific partition.
    consumer.subscribe(Arrays.asList("normal-topic"));

    processRecords(consumer);
  }
예제 #3
0
  private void commitOffsetsForAckedTuples() {
    // Find offsets that are ready to be committed for every topic partition
    final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>();
    for (Map.Entry<TopicPartition, OffsetEntry> tpOffset : acked.entrySet()) {
      final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset();
      if (nextCommitOffset != null) {
        nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset);
      }
    }

    // Commit offsets that are ready to be committed for every topic partition
    if (!nextCommitOffsets.isEmpty()) {
      kafkaConsumer.commitSync(nextCommitOffsets);
      LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets);
      // Instead of iterating again, it would be possible to commit and update the state for each
      // TopicPartition
      // in the prior loop, but the multiple network calls should be more expensive than iterating
      // twice over a small loop
      for (Map.Entry<TopicPartition, OffsetEntry> tpOffset : acked.entrySet()) {
        final OffsetEntry offsetEntry = tpOffset.getValue();
        offsetEntry.commit(nextCommitOffsets.get(tpOffset.getKey()));
      }
    } else {
      LOG.trace("No offsets to commit. {}", this);
    }
  }
예제 #4
0
  private void doSeekRetriableTopicPartitions() {
    final Set<TopicPartition> retriableTopicPartitions = retryService.retriableTopicPartitions();

    for (TopicPartition rtp : retriableTopicPartitions) {
      final OffsetAndMetadata offsetAndMeta = acked.get(rtp).findNextCommitOffset();
      if (offsetAndMeta != null) {
        kafkaConsumer.seek(
            rtp,
            offsetAndMeta.offset()
                + 1); // seek to the next offset that is ready to commit in next commit cycle
      } else {
        kafkaConsumer.seek(
            rtp, acked.get(rtp).committedOffset + 1); // Seek to last committed offset
      }
    }
  }
예제 #5
0
 private void shutdown() {
   try {
     if (!consumerAutoCommitMode) {
       commitOffsetsForAckedTuples();
     }
   } finally {
     // remove resources
     kafkaConsumer.close();
   }
 }
예제 #6
0
  @Override
  public KafkaTridentSpoutBatchMetadata<K, V> emitPartitionBatch(
      TransactionAttempt tx,
      TridentCollector collector,
      KafkaTridentSpoutTopicPartition partitionTs,
      KafkaTridentSpoutBatchMetadata<K, V> lastBatch) {
    LOG.debug(
        "Emitting batch: [transaction = {}], [partition = {}], [collector = {}], [lastBatchMetadata = {}]",
        tx,
        partitionTs,
        collector,
        lastBatch);

    final TopicPartition topicPartition = partitionTs.getTopicPartition();
    KafkaTridentSpoutBatchMetadata<K, V> currentBatch = lastBatch;
    Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet();

    try {
      // pause other topic partitions to only poll from current topic partition
      pausedTopicPartitions = pauseTopicPartitions(topicPartition);

      seek(topicPartition, lastBatch);

      // poll
      final ConsumerRecords<K, V> records = kafkaConsumer.poll(pollTimeoutMs);
      LOG.debug("Polled [{}] records from Kafka.", records.count());

      if (!records.isEmpty()) {
        emitTuples(collector, records);
        // build new metadata
        currentBatch = new KafkaTridentSpoutBatchMetadata<>(topicPartition, records, lastBatch);
      }
    } finally {
      kafkaConsumer.resume(pausedTopicPartitions);
      LOG.trace("Resumed topic partitions [{}]", pausedTopicPartitions);
    }
    LOG.debug("Current batch metadata {}", currentBatch);
    return currentBatch;
  }
  private static void processRecords(KafkaConsumer<String, String> consumer)
      throws InterruptedException {

    while (true) {

      ConsumerRecords<String, String> records = consumer.poll(100);
      long lastOffset = 0;

      for (ConsumerRecord<String, String> record : records) {
        System.out.printf(
            "\n\roffset = %d, key = %s, value = %s", record.offset(), record.key(), record.value());
        lastOffset = record.offset();
      }

      System.out.println("lastOffset read: " + lastOffset);

      process();

      // Below call is important to control the offset commit. Do this call after you
      // finish processing the business process to get the at least once guarantee.

      consumer.commitSync();
    }
  }
예제 #8
0
  public static void main(String[] args) throws UnknownHostException {
    Properties props = new Properties();
    props.put("bootstrap.servers", "kafka01:9092,kafka02:9092,kafka03:9092");
    props.put("group.id", "test");
    props.put("enable.auto.commit", "true");
    props.put("auto.commit.interval.ms", "1000");
    props.put("session.timeout.ms", "30000");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList("apt-receive1"));
    List<TopicPartition> partitions = new ArrayList<>();
    //		partitions.add(new TopicPartition("apt-receive1", 2));
    //		partitions.add(new TopicPartition("apt-receive1", 13));
    //		consumer.assign(partitions);
    for (int i = 0; i < 10000; i++) {
      ConsumerRecords<String, byte[]> records = consumer.poll(100);
      System.out.println(i + ": " + records.count());
      for (ConsumerRecord<String, byte[]> record : records) {
        //				System.out.println(record.key());
        bloom[Integer.parseInt(record.key())] = 1;
      }
      //			if (sum == 10000) {
      //				System.out.println("sum=" + sum);
      //				break;
      //			}

    }
    for (int j = 0; j < 10_000_000; j++) {
      if (bloom[j] == 0) {
        System.err.println("" + j);
      }
    }
    consumer.close();
    System.err.println("Finish!");
  }
예제 #9
0
  /**
   * Determines the offset of the next fetch. For failed batches lastBatchMeta is not null and
   * contains the fetch offset of the failed batch. In this scenario the next fetch will take place
   * at the offset of the failed batch. When the previous batch is successful, lastBatchMeta is
   * null, and the offset of the next fetch is either the offset of the last commit to kafka, or if
   * no commit was yet made, the offset dictated by {@link KafkaSpoutConfig.FirstPollOffsetStrategy}
   *
   * @return the offset of the next fetch
   */
  private long seek(TopicPartition tp, KafkaTridentSpoutBatchMetadata<K, V> lastBatchMeta) {
    if (lastBatchMeta != null) {
      kafkaConsumer.seek(
          tp,
          lastBatchMeta.getLastOffset()
              + 1); // seek next offset after last offset from previous batch
      LOG.debug("Seeking fetch offset to next offset after last offset from previous batch");

    } else {
      LOG.debug("Seeking fetch offset from firstPollOffsetStrategy and last commit to Kafka");
      final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp);
      if (committedOffset != null) { // offset was committed for this TopicPartition
        if (firstPollOffsetStrategy.equals(EARLIEST)) {
          kafkaConsumer.seekToBeginning(toArrayList(tp));
        } else if (firstPollOffsetStrategy.equals(LATEST)) {
          kafkaConsumer.seekToEnd(toArrayList(tp));
        } else {
          // By default polling starts at the last committed offset. +1 to point fetch to the first
          // uncommitted offset.
          kafkaConsumer.seek(tp, committedOffset.offset() + 1);
        }
      } else { // no commits have ever been done, so start at the beginning or end depending on the
               // strategy
        if (firstPollOffsetStrategy.equals(EARLIEST)
            || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) {
          kafkaConsumer.seekToBeginning(toArrayList(tp));
        } else if (firstPollOffsetStrategy.equals(LATEST)
            || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) {
          kafkaConsumer.seekToEnd(toArrayList(tp));
        }
      }
    }
    final long fetchOffset = kafkaConsumer.position(tp);
    LOG.debug("Set [fetchOffset = {}]", fetchOffset);
    return fetchOffset;
  }
예제 #10
0
 @Override
 public void close() {
   kafkaConsumer.close();
   LOG.debug("Closed");
 }
  /** @see stream.io.AbstractStream#init() */
  @Override
  public void init() throws Exception {
    super.init();

    if (group == null) {
      group = UUID.randomUUID().toString();
    }

    Properties props = new Properties();
    props.put("zookeeper.connect", zookeeper);
    props.put("metadata.broker.list", broker);
    props.put("bootstrap.servers", broker);
    props.put("group.id", group);
    props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("enable.auto.commit", "true");
    props.put("auto.commit.interval.ms", "10000");

    log.debug("Creating kafka consumer...");
    final KafkaConsumer<byte[], byte[]> kc = new KafkaConsumer<byte[], byte[]>(props);
    consumer = kc;

    log.info("Subscribing to topic '{}'", topic);
    consumer.subscribe(Arrays.asList(topic));
    log.info("Using codec {}", valueCodec);

    List<PartitionInfo> partitionInfo = consumer.partitionsFor(topic);
    log.debug("topic '{}' has {} partitions", topic, partitionInfo.size());

    pollThread =
        new Thread() {
          public void run() {
            try {
              running.set(true);
              while (running.get()) {
                ConsumerRecords<byte[], byte[]> messages = consumer.poll(100L);
                log.debug("Polled new messages: {}", messages);
                while (messages == null || messages.count() < 1) {
                  messages = consumer.poll(1000L);
                  log.debug("Polled new messages: {}", messages);
                }

                if (messages != null) {
                  Iterator<ConsumerRecord<byte[], byte[]>> it = messages.iterator();
                  while (it.hasNext()) {
                    ConsumerRecord<byte[], byte[]> record = it.next();

                    Object key = record.key();
                    byte[] data = record.value();

                    Data item = null;
                    if (valueCodec != null) {
                      log.debug("de-serializing item using {}", valueCodec);
                      item = valueCodec.decode(data);
                    } else {
                      item = DataFactory.create();
                      if (key != null) {
                        item.put("@kafka:key", (Serializable) key);
                      }
                      item.put("@kafka:value", data);
                    }
                    item.put("@kafka:topic", record.topic());
                    item.put("@kafka:partition", record.partition());
                    queue.add(item);
                  }
                }
              }
            } catch (Exception e) {
              e.printStackTrace();
            } finally {
              log.info("Closing topic-stream reader...");
              queue.add(endOfStream);
            }
          }
        };

    pollThread.setDaemon(true);
    pollThread.start();

    Signals.register(
        new Hook() {
          @Override
          public void signal(int flags) {
            log.info("Received shutdown-signal!");
            running.set(false);
            pollThread.interrupt();
            log.info("Adding EOF to queue...");
            queue.add(endOfStream);
          }
        });
  }
예제 #12
0
  @Override
  public void drive() {
    // A Consumer is not thread-safe
    // {@see
    // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html}
    // {@see
    // http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#multithreaded}
    try (KafkaConsumer<byte[], byte[]> consumer =
        new KafkaConsumer<>(consumerDefinition.getKafkaConfig())) {

      String topic = consumerDefinition.getTopic();
      log.info("Subscribing to {}", topic);
      if (consumerRebalanceListener == null) {
        consumer.subscribe(Collections.singletonList(topic));
      } else {
        consumer.subscribe(Collections.singletonList(topic), consumerRebalanceListener);
      }

      long messagesToReceive = consumerDefinition.getMessagesToReceive();
      log.info("Expecting {} messages", messagesToReceive);

      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      do {
        ConsumerRecords<byte[], byte[]> records =
            consumer.poll(consumerDefinition.getPollTimeout());
        if (records == null) {
          throw new IllegalStateException("null ConsumerRecords polled");
        } else {
          if (records.count() == 0) {
            try {
              log.info("No records fetched, pausing");
              Thread.sleep(1000);
            } catch (InterruptedException e) {
              throw new RuntimeException(e);
            }
          } else {
            if (log.isTraceEnabled()) {
              log.trace("Fetched {} records", records.count());
            }
            for (ConsumerRecord<byte[], byte[]> record : records) {
              recordsFetched += 1;
              applyReceiveDelay();
              if (recordsFetched % consumerDefinition.getReportReceivedEvery() == 0) {
                log.info("Received {} messages", recordsFetched);
              }
            }
          }
        }

        if (isShutdownRequested()) {
          break;
        }
        stopWatch.split();
      } while ((recordsFetched < messagesToReceive)
          && (stopWatch.getSplitTime() < consumerDefinition.getTestRunTimeout()));

      stopWatch.stop();
      if (isShutdownRequested()) {
        log.info("Shutting down");
      } else {
        long runTime = stopWatch.getTime();
        log.info("Done. Consumer received {} msgs in {} ms", messagesToReceive, runTime);

        double averageThroughput = (1000d / runTime) * messagesToReceive;
        log.info("Average throughput: {} msg/s", averageThroughput);
      }

    } finally {
      log.debug("Consumer closed");
      if (completionLatch != null) {
        completionLatch.countDown();
      }
    }
  }