Ejemplo n.º 1
0
  @Override
  public void run(SourceContext<OUT> ctx) throws Exception {
    if (iteratorToRead == null) {
      throw new IllegalStateException("Kafka iterator not initialized properly.");
    }

    final Object checkpointLock = ctx.getCheckpointLock();

    while (running && iteratorToRead.hasNext()) {
      MessageAndMetadata<byte[], byte[]> message = iteratorToRead.next();
      if (lastOffsets.getState()[message.partition()] >= message.offset()) {
        LOG.info(
            "Skipping message with offset {} from partition {}",
            message.offset(),
            message.partition());
        continue;
      }
      OUT next = deserializationSchema.deserialize(message.message());

      if (deserializationSchema.isEndOfStream(next)) {
        LOG.info("DeserializationSchema signaled end of stream for this source");
        break;
      }

      // make the state update and the element emission atomic
      synchronized (checkpointLock) {
        lastOffsets.getState()[message.partition()] = message.offset();
        ctx.collect(next);
      }

      if (LOG.isTraceEnabled()) {
        LOG.trace(
            "Processed record with offset {} from partition {}",
            message.offset(),
            message.partition());
      }
    }
  }
Ejemplo n.º 2
0
  @Override
  public void open(Configuration parameters) throws Exception {
    super.open(parameters);
    ConsumerConnector consumer = Consumer.createJavaConsumerConnector(this.consumerConfig);
    // we request only one stream per consumer instance. Kafka will make sure that each consumer
    // group
    // will see each message only once.
    Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> streams =
        consumer.createMessageStreams(topicCountMap);
    if (streams.size() != 1) {
      throw new RuntimeException("Expected only one message stream but got " + streams.size());
    }
    List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName);
    if (kafkaStreams == null) {
      throw new RuntimeException(
          "Requested stream not available. Available streams: " + streams.toString());
    }
    if (kafkaStreams.size() != 1) {
      throw new RuntimeException(
          "Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams");
    }
    LOG.info(
        "Opening Consumer instance for topic '{}' on group '{}'",
        topicName,
        consumerConfig.groupId());
    this.iteratorToRead = kafkaStreams.get(0).iterator();
    this.consumer = consumer;

    zkClient =
        new ZkClient(
            consumerConfig.zkConnect(),
            consumerConfig.zkSessionTimeoutMs(),
            consumerConfig.zkConnectionTimeoutMs(),
            new KafkaZKStringSerializer());

    // most likely the number of offsets we're going to store here will be lower than the number of
    // partitions.
    int numPartitions = getNumberOfPartitions();
    LOG.debug("The topic {} has {} partitions", topicName, numPartitions);
    this.lastOffsets =
        getRuntimeContext().getOperatorState("offset", new long[numPartitions], false);
    this.commitedOffsets = new long[numPartitions];
    // check if there are offsets to restore
    if (!Arrays.equals(lastOffsets.getState(), new long[numPartitions])) {
      if (lastOffsets.getState().length != numPartitions) {
        throw new IllegalStateException(
            "There are "
                + lastOffsets.getState().length
                + " offsets to restore for topic "
                + topicName
                + " but "
                + "there are only "
                + numPartitions
                + " in the topic");
      }

      LOG.info("Setting restored offsets {} in ZooKeeper", Arrays.toString(lastOffsets.getState()));
      setOffsetsInZooKeeper(lastOffsets.getState());
    } else {
      // initialize empty offsets
      Arrays.fill(this.lastOffsets.getState(), -1);
    }
    Arrays.fill(this.commitedOffsets, 0); // just to make it clear

    running = true;
  }