/** * Marks an offset has committed. This method has side effects - it sets the internal state in * such a way that future calls to {@link #findNextCommitOffset()} will return offsets greater * than the offset specified, if any. * * @param committedOffset offset to be marked as committed */ public void commit(OffsetAndMetadata committedOffset) { long numCommittedOffsets = 0; if (committedOffset != null) { final long oldCommittedOffset = this.committedOffset; numCommittedOffsets = committedOffset.offset() - this.committedOffset; this.committedOffset = committedOffset.offset(); for (Iterator<KafkaSpoutMessageId> iterator = ackedMsgs.iterator(); iterator.hasNext(); ) { if (iterator.next().offset() <= committedOffset.offset()) { iterator.remove(); } else { break; } } numUncommittedOffsets -= numCommittedOffsets; LOG.debug( "Committed offsets [{}-{} = {}] for topic-partition [{}]. [{}] uncommitted offsets across all topic partitions", oldCommittedOffset + 1, this.committedOffset, numCommittedOffsets, tp, numUncommittedOffsets); } else { LOG.debug( "Committed [{}] offsets for topic-partition [{}]. [{}] uncommitted offsets across all topic partitions", numCommittedOffsets, tp, numUncommittedOffsets); } LOG.trace("{}", this); }
/** * sets the cursor to the location dictated by the first poll strategy and returns the fetch * offset */ private long doSeek(TopicPartition tp, OffsetAndMetadata committedOffset) { long fetchOffset; if (committedOffset != null) { // offset was committed for this TopicPartition if (firstPollOffsetStrategy.equals(EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); fetchOffset = kafkaConsumer.position(tp); } else if (firstPollOffsetStrategy.equals(LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); fetchOffset = kafkaConsumer.position(tp); } else { // By default polling starts at the last committed offset. +1 to point fetch to the first // uncommitted offset. fetchOffset = committedOffset.offset() + 1; kafkaConsumer.seek(tp, fetchOffset); } } else { // no commits have ever been done, so start at the beginning or end depending on the // strategy if (firstPollOffsetStrategy.equals(EARLIEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); } else if (firstPollOffsetStrategy.equals(LATEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); } fetchOffset = kafkaConsumer.position(tp); } return fetchOffset; }
/** @return the next OffsetAndMetadata to commit, or null if no offset is ready to commit. */ public OffsetAndMetadata findNextCommitOffset() { boolean found = false; long currOffset; long nextCommitOffset = committedOffset; KafkaSpoutMessageId nextCommitMsg = null; // this is a convenience variable to make it faster to create OffsetAndMetadata for (KafkaSpoutMessageId currAckedMsg : ackedMsgs) { // complexity is that of a linear scan on a TreeMap if ((currOffset = currAckedMsg.offset()) == nextCommitOffset + 1) { // found the next offset to commit found = true; nextCommitMsg = currAckedMsg; nextCommitOffset = currOffset; } else if (currAckedMsg.offset() > nextCommitOffset + 1) { // offset found is not continuous to the offsets listed to go in the next // commit, so stop search LOG.debug( "topic-partition [{}] has non-continuous offset [{}]. It will be processed in a subsequent batch.", tp, currOffset); break; } else { // Received a redundant ack. Ignore and continue processing. LOG.warn( "topic-partition [{}] has unexpected offset [{}]. Current committed Offset [{}]", tp, currOffset, committedOffset); } } OffsetAndMetadata nextCommitOffsetAndMetadata = null; if (found) { nextCommitOffsetAndMetadata = new OffsetAndMetadata( nextCommitOffset, nextCommitMsg.getMetadata(Thread.currentThread())); LOG.debug( "topic-partition [{}] has offsets [{}-{}] ready to be committed", tp, committedOffset + 1, nextCommitOffsetAndMetadata.offset()); } else { LOG.debug("topic-partition [{}] has NO offsets ready to be committed", tp); } LOG.trace("{}", this); return nextCommitOffsetAndMetadata; }
private void doSeekRetriableTopicPartitions() { final Set<TopicPartition> retriableTopicPartitions = retryService.retriableTopicPartitions(); for (TopicPartition rtp : retriableTopicPartitions) { final OffsetAndMetadata offsetAndMeta = acked.get(rtp).findNextCommitOffset(); if (offsetAndMeta != null) { kafkaConsumer.seek( rtp, offsetAndMeta.offset() + 1); // seek to the next offset that is ready to commit in next commit cycle } else { kafkaConsumer.seek( rtp, acked.get(rtp).committedOffset + 1); // Seek to last committed offset } } }
/** * Determines the offset of the next fetch. For failed batches lastBatchMeta is not null and * contains the fetch offset of the failed batch. In this scenario the next fetch will take place * at the offset of the failed batch. When the previous batch is successful, lastBatchMeta is * null, and the offset of the next fetch is either the offset of the last commit to kafka, or if * no commit was yet made, the offset dictated by {@link KafkaSpoutConfig.FirstPollOffsetStrategy} * * @return the offset of the next fetch */ private long seek(TopicPartition tp, KafkaTridentSpoutBatchMetadata<K, V> lastBatchMeta) { if (lastBatchMeta != null) { kafkaConsumer.seek( tp, lastBatchMeta.getLastOffset() + 1); // seek next offset after last offset from previous batch LOG.debug("Seeking fetch offset to next offset after last offset from previous batch"); } else { LOG.debug("Seeking fetch offset from firstPollOffsetStrategy and last commit to Kafka"); final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp); if (committedOffset != null) { // offset was committed for this TopicPartition if (firstPollOffsetStrategy.equals(EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); } else if (firstPollOffsetStrategy.equals(LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); } else { // By default polling starts at the last committed offset. +1 to point fetch to the first // uncommitted offset. kafkaConsumer.seek(tp, committedOffset.offset() + 1); } } else { // no commits have ever been done, so start at the beginning or end depending on the // strategy if (firstPollOffsetStrategy.equals(EARLIEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) { kafkaConsumer.seekToBeginning(toArrayList(tp)); } else if (firstPollOffsetStrategy.equals(LATEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) { kafkaConsumer.seekToEnd(toArrayList(tp)); } } } final long fetchOffset = kafkaConsumer.position(tp); LOG.debug("Set [fetchOffset = {}]", fetchOffset); return fetchOffset; }