예제 #1
0
 /**
  * Marks an offset has committed. This method has side effects - it sets the internal state in
  * such a way that future calls to {@link #findNextCommitOffset()} will return offsets greater
  * than the offset specified, if any.
  *
  * @param committedOffset offset to be marked as committed
  */
 public void commit(OffsetAndMetadata committedOffset) {
   long numCommittedOffsets = 0;
   if (committedOffset != null) {
     final long oldCommittedOffset = this.committedOffset;
     numCommittedOffsets = committedOffset.offset() - this.committedOffset;
     this.committedOffset = committedOffset.offset();
     for (Iterator<KafkaSpoutMessageId> iterator = ackedMsgs.iterator(); iterator.hasNext(); ) {
       if (iterator.next().offset() <= committedOffset.offset()) {
         iterator.remove();
       } else {
         break;
       }
     }
     numUncommittedOffsets -= numCommittedOffsets;
     LOG.debug(
         "Committed offsets [{}-{} = {}] for topic-partition [{}]. [{}] uncommitted offsets across all topic partitions",
         oldCommittedOffset + 1,
         this.committedOffset,
         numCommittedOffsets,
         tp,
         numUncommittedOffsets);
   } else {
     LOG.debug(
         "Committed [{}] offsets for topic-partition [{}]. [{}] uncommitted offsets across all topic partitions",
         numCommittedOffsets,
         tp,
         numUncommittedOffsets);
   }
   LOG.trace("{}", this);
 }
예제 #2
0
    /** @return the next OffsetAndMetadata to commit, or null if no offset is ready to commit. */
    public OffsetAndMetadata findNextCommitOffset() {
      boolean found = false;
      long currOffset;
      long nextCommitOffset = committedOffset;
      KafkaSpoutMessageId nextCommitMsg =
          null; // this is a convenience variable to make it faster to create OffsetAndMetadata

      for (KafkaSpoutMessageId currAckedMsg :
          ackedMsgs) { // complexity is that of a linear scan on a TreeMap
        if ((currOffset = currAckedMsg.offset())
            == nextCommitOffset + 1) { // found the next offset to commit
          found = true;
          nextCommitMsg = currAckedMsg;
          nextCommitOffset = currOffset;
        } else if (currAckedMsg.offset()
            > nextCommitOffset
                + 1) { // offset found is not continuous to the offsets listed to go in the next
                       // commit, so stop search
          LOG.debug(
              "topic-partition [{}] has non-continuous offset [{}]. It will be processed in a subsequent batch.",
              tp,
              currOffset);
          break;
        } else {
          // Received a redundant ack. Ignore and continue processing.
          LOG.warn(
              "topic-partition [{}] has unexpected offset [{}]. Current committed Offset [{}]",
              tp,
              currOffset,
              committedOffset);
        }
      }

      OffsetAndMetadata nextCommitOffsetAndMetadata = null;
      if (found) {
        nextCommitOffsetAndMetadata =
            new OffsetAndMetadata(
                nextCommitOffset, nextCommitMsg.getMetadata(Thread.currentThread()));
        LOG.debug(
            "topic-partition [{}] has offsets [{}-{}] ready to be committed",
            tp,
            committedOffset + 1,
            nextCommitOffsetAndMetadata.offset());
      } else {
        LOG.debug("topic-partition [{}] has NO offsets ready to be committed", tp);
      }
      LOG.trace("{}", this);
      return nextCommitOffsetAndMetadata;
    }
예제 #3
0
 /**
  * sets the cursor to the location dictated by the first poll strategy and returns the fetch
  * offset
  */
 private long doSeek(TopicPartition tp, OffsetAndMetadata committedOffset) {
   long fetchOffset;
   if (committedOffset != null) { // offset was committed for this TopicPartition
     if (firstPollOffsetStrategy.equals(EARLIEST)) {
       kafkaConsumer.seekToBeginning(toArrayList(tp));
       fetchOffset = kafkaConsumer.position(tp);
     } else if (firstPollOffsetStrategy.equals(LATEST)) {
       kafkaConsumer.seekToEnd(toArrayList(tp));
       fetchOffset = kafkaConsumer.position(tp);
     } else {
       // By default polling starts at the last committed offset. +1 to point fetch to the first
       // uncommitted offset.
       fetchOffset = committedOffset.offset() + 1;
       kafkaConsumer.seek(tp, fetchOffset);
     }
   } else { // no commits have ever been done, so start at the beginning or end depending on the
            // strategy
     if (firstPollOffsetStrategy.equals(EARLIEST)
         || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) {
       kafkaConsumer.seekToBeginning(toArrayList(tp));
     } else if (firstPollOffsetStrategy.equals(LATEST)
         || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) {
       kafkaConsumer.seekToEnd(toArrayList(tp));
     }
     fetchOffset = kafkaConsumer.position(tp);
   }
   return fetchOffset;
 }
예제 #4
0
  private void doSeekRetriableTopicPartitions() {
    final Set<TopicPartition> retriableTopicPartitions = retryService.retriableTopicPartitions();

    for (TopicPartition rtp : retriableTopicPartitions) {
      final OffsetAndMetadata offsetAndMeta = acked.get(rtp).findNextCommitOffset();
      if (offsetAndMeta != null) {
        kafkaConsumer.seek(
            rtp,
            offsetAndMeta.offset()
                + 1); // seek to the next offset that is ready to commit in next commit cycle
      } else {
        kafkaConsumer.seek(
            rtp, acked.get(rtp).committedOffset + 1); // Seek to last committed offset
      }
    }
  }
예제 #5
0
  /**
   * Determines the offset of the next fetch. For failed batches lastBatchMeta is not null and
   * contains the fetch offset of the failed batch. In this scenario the next fetch will take place
   * at the offset of the failed batch. When the previous batch is successful, lastBatchMeta is
   * null, and the offset of the next fetch is either the offset of the last commit to kafka, or if
   * no commit was yet made, the offset dictated by {@link KafkaSpoutConfig.FirstPollOffsetStrategy}
   *
   * @return the offset of the next fetch
   */
  private long seek(TopicPartition tp, KafkaTridentSpoutBatchMetadata<K, V> lastBatchMeta) {
    if (lastBatchMeta != null) {
      kafkaConsumer.seek(
          tp,
          lastBatchMeta.getLastOffset()
              + 1); // seek next offset after last offset from previous batch
      LOG.debug("Seeking fetch offset to next offset after last offset from previous batch");

    } else {
      LOG.debug("Seeking fetch offset from firstPollOffsetStrategy and last commit to Kafka");
      final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp);
      if (committedOffset != null) { // offset was committed for this TopicPartition
        if (firstPollOffsetStrategy.equals(EARLIEST)) {
          kafkaConsumer.seekToBeginning(toArrayList(tp));
        } else if (firstPollOffsetStrategy.equals(LATEST)) {
          kafkaConsumer.seekToEnd(toArrayList(tp));
        } else {
          // By default polling starts at the last committed offset. +1 to point fetch to the first
          // uncommitted offset.
          kafkaConsumer.seek(tp, committedOffset.offset() + 1);
        }
      } else { // no commits have ever been done, so start at the beginning or end depending on the
               // strategy
        if (firstPollOffsetStrategy.equals(EARLIEST)
            || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) {
          kafkaConsumer.seekToBeginning(toArrayList(tp));
        } else if (firstPollOffsetStrategy.equals(LATEST)
            || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) {
          kafkaConsumer.seekToEnd(toArrayList(tp));
        }
      }
    }
    final long fetchOffset = kafkaConsumer.position(tp);
    LOG.debug("Set [fetchOffset = {}]", fetchOffset);
    return fetchOffset;
  }