// Emits one tuple per record // @return true if tuple was emitted private boolean emitTupleIfNotEmitted(ConsumerRecord<K, V> record) { final TopicPartition tp = new TopicPartition(record.topic(), record.partition()); final KafkaSpoutMessageId msgId = new KafkaSpoutMessageId(record); if (acked.containsKey(tp) && acked.get(tp).contains(msgId)) { // has been acked LOG.trace("Tuple for record [{}] has already been acked. Skipping", record); } else if (emitted.contains(msgId)) { // has been emitted and it's pending ack or fail LOG.trace("Tuple for record [{}] has already been emitted. Skipping", record); } else { boolean isScheduled = retryService.isScheduled(msgId); if (!isScheduled || retryService.isReady( msgId)) { // not scheduled <=> never failed (i.e. never emitted) or ready to be // retried final List<Object> tuple = tuplesBuilder.buildTuple(record); kafkaSpoutStreams.emit(collector, tuple, msgId); emitted.add(msgId); numUncommittedOffsets++; if (isScheduled) { // Was scheduled for retry, now being re-emitted. Remove from schedule. retryService.remove(msgId); } LOG.trace("Emitted tuple [{}] for record [{}]", tuple, record); return true; } } return false; }
@Override protected boolean matches(Object item, Description mismatchDescription) { @SuppressWarnings("unchecked") ConsumerRecord<Object, Object> record = (ConsumerRecord<Object, Object>) item; boolean matches = record != null && record.partition() == this.partition; if (!matches) { mismatchDescription.appendText("is ").appendValue(record); } return matches; }
@Override protected boolean matches(Object item, Description mismatchDescription) { @SuppressWarnings("unchecked") ConsumerRecord<K, Object> record = (ConsumerRecord<K, Object>) item; boolean matches = record != null && ((record.key() == null && this.key == null) || record.key().equals(this.key)); if (!matches) { mismatchDescription.appendText("is ").appendValue(record); } return matches; }
private static void processRecords(KafkaConsumer<String, String> consumer) throws InterruptedException { while (true) { ConsumerRecords<String, String> records = consumer.poll(100); long lastOffset = 0; for (ConsumerRecord<String, String> record : records) { System.out.printf( "\n\roffset = %d, key = %s, value = %s", record.offset(), record.key(), record.value()); lastOffset = record.offset(); } System.out.println("lastOffset read: " + lastOffset); process(); // Below call is important to control the offset commit. Do this call after you // finish processing the business process to get the at least once guarantee. consumer.commitSync(); } }
public static void main(String[] args) throws UnknownHostException { Properties props = new Properties(); props.put("bootstrap.servers", "kafka01:9092,kafka02:9092,kafka03:9092"); props.put("group.id", "test"); props.put("enable.auto.commit", "true"); props.put("auto.commit.interval.ms", "1000"); props.put("session.timeout.ms", "30000"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props); consumer.subscribe(Arrays.asList("apt-receive1")); List<TopicPartition> partitions = new ArrayList<>(); // partitions.add(new TopicPartition("apt-receive1", 2)); // partitions.add(new TopicPartition("apt-receive1", 13)); // consumer.assign(partitions); for (int i = 0; i < 10000; i++) { ConsumerRecords<String, byte[]> records = consumer.poll(100); System.out.println(i + ": " + records.count()); for (ConsumerRecord<String, byte[]> record : records) { // System.out.println(record.key()); bloom[Integer.parseInt(record.key())] = 1; } // if (sum == 10000) { // System.out.println("sum=" + sum); // break; // } } for (int j = 0; j < 10_000_000; j++) { if (bloom[j] == 0) { System.err.println("" + j); } } consumer.close(); System.err.println("Finish!"); }
@Override public void doWork() { try { ConsumerRecords<byte[], byte[]> records = consumer.poll(Long.MAX_VALUE); for (ConsumerRecord<byte[], byte[]> record : records) { K messageKey = null; try { messageKey = this.serializer.deserializeKey(record.key()); } catch (SerializationException e) { log.error("Failed to deserialize the schema or config key", e); continue; } if (messageKey.equals(noopKey)) { // If it's a noop, update local offset counter and do nothing else try { offsetUpdateLock.lock(); offsetInSchemasTopic = record.offset(); offsetReachedThreshold.signalAll(); } finally { offsetUpdateLock.unlock(); } } else { V message = null; try { message = record.value() == null ? null : serializer.deserializeValue(messageKey, record.value()); } catch (SerializationException e) { log.error("Failed to deserialize a schema or config update", e); continue; } try { log.trace( "Applying update (" + messageKey + "," + message + ") to the local " + "store"); if (message == null) { localStore.delete(messageKey); } else { localStore.put(messageKey, message); } this.storeUpdateHandler.handleUpdate(messageKey, message); try { offsetUpdateLock.lock(); offsetInSchemasTopic = record.offset(); offsetReachedThreshold.signalAll(); } finally { offsetUpdateLock.unlock(); } } catch (StoreException se) { log.error("Failed to add record from the Kafka topic" + topic + " the local store"); } } } } catch (WakeupException we) { // do nothing because the thread is closing -- see shutdown() } catch (RecordTooLargeException rtle) { throw new IllegalStateException( "Consumer threw RecordTooLargeException. A schema has been written that " + "exceeds the default maximum fetch size.", rtle); } catch (RuntimeException e) { log.error("KafkaStoreReader thread has died for an unknown reason."); throw new RuntimeException(e); } }