Exemplo n.º 1
0
  @Override
  public Data process(Data data) {

    Data item = DataFactory.create();
    item.put("time", LocalDateTime.now().toString());

    String[] evKeys = {"@stream"};
    for (String key : evKeys) {
      if (data.containsKey(key)) {
        item.put(key, data.get(key));
      }
    }

    // add objects by specified keys to data item
    for (String key : keys) {
      item.put(key, data.get(key));
    }

    try {
      if (writeBlock && !firstLine) {
        bw.write(",");
      }
      bw.write(gson.toJson(item));

      bw.newLine();
      bw.flush();
      firstLine = false;
    } catch (IOException ioex) {
      ioex.printStackTrace();
    }
    return data;
  }
/**
 * This class implements a single-threaded consumer for a Kafka topic. It will subscribe to a given
 * topic and consume messages from all partitions.
 *
 * @author Christian Bockermann
 */
public class SingleTopicStream extends AbstractStream {

  static Logger log = LoggerFactory.getLogger(SingleTopicStream.class);

  final Data endOfStream = DataFactory.create();

  @Parameter(required = true, description = "Zookeeper servers to connect to.")
  String zookeeper;

  @Parameter(required = true, description = "Brokers to connect to.")
  String broker;

  @Parameter(required = true, description = "The topic to which this stream subscribes.")
  String topic;

  @Parameter(
      required = false,
      description =
          "The group id of the consumer - if not provided, a random identified will be generated.")
  String group;

  KafkaConsumer<byte[], byte[]> consumer;
  StringObjectDecoder keyCodec = new StringObjectDecoder();

  Codec<Data> valueCodec;

  final LinkedBlockingQueue<Data> queue = new LinkedBlockingQueue<Data>();

  Thread pollThread = null;
  final AtomicBoolean running = new AtomicBoolean(true);
  boolean closed = false;

  /** @see stream.io.AbstractStream#init() */
  @Override
  public void init() throws Exception {
    super.init();

    if (group == null) {
      group = UUID.randomUUID().toString();
    }

    Properties props = new Properties();
    props.put("zookeeper.connect", zookeeper);
    props.put("metadata.broker.list", broker);
    props.put("bootstrap.servers", broker);
    props.put("group.id", group);
    props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("enable.auto.commit", "true");
    props.put("auto.commit.interval.ms", "10000");

    log.debug("Creating kafka consumer...");
    final KafkaConsumer<byte[], byte[]> kc = new KafkaConsumer<byte[], byte[]>(props);
    consumer = kc;

    log.info("Subscribing to topic '{}'", topic);
    consumer.subscribe(Arrays.asList(topic));
    log.info("Using codec {}", valueCodec);

    List<PartitionInfo> partitionInfo = consumer.partitionsFor(topic);
    log.debug("topic '{}' has {} partitions", topic, partitionInfo.size());

    pollThread =
        new Thread() {
          public void run() {
            try {
              running.set(true);
              while (running.get()) {
                ConsumerRecords<byte[], byte[]> messages = consumer.poll(100L);
                log.debug("Polled new messages: {}", messages);
                while (messages == null || messages.count() < 1) {
                  messages = consumer.poll(1000L);
                  log.debug("Polled new messages: {}", messages);
                }

                if (messages != null) {
                  Iterator<ConsumerRecord<byte[], byte[]>> it = messages.iterator();
                  while (it.hasNext()) {
                    ConsumerRecord<byte[], byte[]> record = it.next();

                    Object key = record.key();
                    byte[] data = record.value();

                    Data item = null;
                    if (valueCodec != null) {
                      log.debug("de-serializing item using {}", valueCodec);
                      item = valueCodec.decode(data);
                    } else {
                      item = DataFactory.create();
                      if (key != null) {
                        item.put("@kafka:key", (Serializable) key);
                      }
                      item.put("@kafka:value", data);
                    }
                    item.put("@kafka:topic", record.topic());
                    item.put("@kafka:partition", record.partition());
                    queue.add(item);
                  }
                }
              }
            } catch (Exception e) {
              e.printStackTrace();
            } finally {
              log.info("Closing topic-stream reader...");
              queue.add(endOfStream);
            }
          }
        };

    pollThread.setDaemon(true);
    pollThread.start();

    Signals.register(
        new Hook() {
          @Override
          public void signal(int flags) {
            log.info("Received shutdown-signal!");
            running.set(false);
            pollThread.interrupt();
            log.info("Adding EOF to queue...");
            queue.add(endOfStream);
          }
        });
  }

  /** @see stream.io.AbstractStream#readNext() */
  @Override
  public synchronized Data readNext() throws Exception {
    if (pollThread == null) {
      init();
    }

    if (closed) {
      return null;
    }

    Data item = queue.take();
    if (item == endOfStream) {
      closed = true;
      return null;
    }
    return item;
  }

  @Parameter(
      required = false,
      description =
          "Codec to de-serialize Data item from message bytes, by default message bytes are stored in a new item in key '@kafka:value'.")
  public void setCodec(String codec) {
    if (codec != null) {
      try {
        this.valueCodec = CodecUtils.create(codec);
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException(
            "Failed to create codec from '" + codec + "': " + e.getMessage());
      }
    }
  }
}