protected void setOffset(int partition, long offset) { if (commitedOffsets[partition] < offset) { setOffset(zkClient, consumerConfig.groupId(), topicName, partition, offset); commitedOffsets[partition] = offset; } else { LOG.debug( "Ignoring offset {} for partition {} because it is already committed", offset, partition); } }
@Test public void javaHandleKafkaStorage() { final ConsumerProperties consumerProperties = new PropertiesBuilder.Consumer( brokerList, zooKeepHost, topic, groupId, new StringDecoder(null)) .build() .readFromEndOfStream() .consumerTimeoutMs(1234) .kafkaOffsetsStorage(true); final ConsumerConfig consumerConfig = consumerProperties.toConsumerConfig(); assertEquals(consumerProperties.zookeeperConnect(), zooKeepHost); assertEquals(consumerProperties.topic(), topic); assertEquals(consumerProperties.groupId(), groupId); assertEquals( consumerProperties.decoder().getClass().getSimpleName(), StringDecoder.class.getSimpleName()); assertEquals(consumerConfig.clientId(), groupId); assertEquals(consumerConfig.autoOffsetReset(), "largest"); assertEquals(consumerConfig.offsetsStorage(), "kafka"); assertEquals(consumerConfig.consumerTimeoutMs(), 1234); assertEquals(consumerConfig.dualCommitEnabled(), true); }
@Test public void javaHandleBaseCase() { final PropertiesBuilder.Consumer propsBuilder = new PropertiesBuilder.Consumer( brokerList, zooKeepHost, topic, groupId, new StringDecoder(null)); assertEquals(propsBuilder.getBrokerList(), brokerList); assertEquals(propsBuilder.getZooKeeperHost(), zooKeepHost); final ConsumerProperties consumerProperties = propsBuilder.build(); final ConsumerConfig consumerConfig = consumerProperties.toConsumerConfig(); assertEquals(consumerProperties.zookeeperConnect(), zooKeepHost); assertEquals(consumerProperties.topic(), topic); assertEquals(consumerProperties.groupId(), groupId); assertEquals( consumerProperties.decoder().getClass().getSimpleName(), StringDecoder.class.getSimpleName()); assertEquals(consumerConfig.clientId(), groupId); assertEquals(consumerConfig.autoOffsetReset(), "smallest"); assertEquals(consumerConfig.offsetsStorage(), "zookeeper"); assertEquals(consumerConfig.consumerTimeoutMs(), 1500); assertEquals(consumerConfig.dualCommitEnabled(), false); }
/** * For the @param consumerConfig, specify at least the "groupid" and "zookeeper.connect" string. * The config will be passed into the Kafka High Level Consumer. For a full list of possible * values, check this out: https://kafka.apache.org/documentation.html#consumerconfigs */ public PersistentKafkaSource( String topicName, DeserializationSchema<OUT> deserializationSchema, ConsumerConfig consumerConfig) { Preconditions.checkNotNull(topicName); Preconditions.checkNotNull(deserializationSchema); Preconditions.checkNotNull(consumerConfig); this.topicName = topicName; this.deserializationSchema = deserializationSchema; this.consumerConfig = consumerConfig; if (consumerConfig.autoCommitEnable()) { throw new IllegalArgumentException( "'auto.commit.enable' is set to 'true'. " + "This source can only be used with auto commit disabled because the " + "source is committing to zookeeper by itself (not using the KafkaConsumer)."); } if (!consumerConfig.offsetsStorage().equals("zookeeper")) { // we can currently only commit to ZK. throw new IllegalArgumentException( "The 'offsets.storage' has to be set to 'zookeeper' for this Source to work reliably"); } }
private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { out.defaultWriteObject(); out.writeObject(consumerConfig.props().props()); }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); ConsumerConnector consumer = Consumer.createJavaConsumerConnector(this.consumerConfig); // we request only one stream per consumer instance. Kafka will make sure that each consumer // group // will see each message only once. Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1); Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap); if (streams.size() != 1) { throw new RuntimeException("Expected only one message stream but got " + streams.size()); } List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName); if (kafkaStreams == null) { throw new RuntimeException( "Requested stream not available. Available streams: " + streams.toString()); } if (kafkaStreams.size() != 1) { throw new RuntimeException( "Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams"); } LOG.info( "Opening Consumer instance for topic '{}' on group '{}'", topicName, consumerConfig.groupId()); this.iteratorToRead = kafkaStreams.get(0).iterator(); this.consumer = consumer; zkClient = new ZkClient( consumerConfig.zkConnect(), consumerConfig.zkSessionTimeoutMs(), consumerConfig.zkConnectionTimeoutMs(), new KafkaZKStringSerializer()); // most likely the number of offsets we're going to store here will be lower than the number of // partitions. int numPartitions = getNumberOfPartitions(); LOG.debug("The topic {} has {} partitions", topicName, numPartitions); this.lastOffsets = getRuntimeContext().getOperatorState("offset", new long[numPartitions], false); this.commitedOffsets = new long[numPartitions]; // check if there are offsets to restore if (!Arrays.equals(lastOffsets.getState(), new long[numPartitions])) { if (lastOffsets.getState().length != numPartitions) { throw new IllegalStateException( "There are " + lastOffsets.getState().length + " offsets to restore for topic " + topicName + " but " + "there are only " + numPartitions + " in the topic"); } LOG.info("Setting restored offsets {} in ZooKeeper", Arrays.toString(lastOffsets.getState())); setOffsetsInZooKeeper(lastOffsets.getState()); } else { // initialize empty offsets Arrays.fill(this.lastOffsets.getState(), -1); } Arrays.fill(this.commitedOffsets, 0); // just to make it clear running = true; }
public void setConsumerProperty(String key, Object value) { consumerConfig.props().props().setProperty(checkNotNull(key), checkNotNull(value).toString()); }