@Test
 public void testKafkaInputOperator_Simple() throws Exception {
   int totalCount = 10000;
   KafkaConsumer k = new SimpleKafkaConsumer();
   k.setInitialOffset("earliest");
   testKafkaInputOperator(1000, totalCount, k, true);
 }
  public static void main(String[] args) {
    OptionFactory optFactory = OptionFactory.getInstance();
    Options options = KafkaConsumer.getOptions();
    options.addOption(optFactory.create("tbl", "table", true, "HBase table name.").required());
    options.addOption(optFactory.create("f", "family", true, "Column family."));
    options.addOption(optFactory.create("q", "qualifier", true, "Column qualifier."));
    options.addOption(
        optFactory.create(
            "b", "batchsize", true, "Batch size (number of messages per HBase flush)."));
    options.addOption(optFactory.create("pd", "prefixdate", false, "Prefix key with salted date."));

    CommandLineParser parser = new GnuParser();
    ShutdownHook sh = ShutdownHook.getInstance();
    try {
      // Parse command line options
      CommandLine cmd = parser.parse(options, args);

      final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
      sh.addFirst(consumer);

      // Create a sink for storing data
      SinkConfiguration sinkConfig = new SinkConfiguration();
      if (cmd.hasOption("numthreads")) {
        sinkConfig.setInt(
            "hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads")));
      }
      if (cmd.hasOption("batchsize")) {
        sinkConfig.setInt(
            "hbasesink.hbase.batchsize", Integer.parseInt(cmd.getOptionValue("batchsize")));
      }
      sinkConfig.setString("hbasesink.hbase.tablename", cmd.getOptionValue("table"));
      sinkConfig.setString("hbasesink.hbase.column.family", cmd.getOptionValue("family", "data"));
      sinkConfig.setString(
          "hbasesink.hbase.column.qualifier", cmd.getOptionValue("qualifier", "json"));
      sinkConfig.setBoolean("hbasesink.hbase.rowkey.prefixdate", cmd.hasOption("prefixdate"));
      KeyValueSinkFactory sinkFactory =
          KeyValueSinkFactory.getInstance(HBaseSink.class, sinkConfig);
      sh.addLast(sinkFactory);

      // Set the sink factory for consumer storage
      consumer.setSinkFactory(sinkFactory);

      prepareHealthChecks();

      // Initialize metrics collection, reporting, etc.
      final MetricsManager manager = MetricsManager.getDefaultMetricsManager();

      // Begin polling
      consumer.poll();
    } catch (ParseException e) {
      LOG.error("Error parsing command line options", e);
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp(KafkaHBaseConsumer.class.getName(), options);
    }
  }
  /**
   * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for Kafka, aka consumer). This
   * module receives data from an outside test generator through Kafka message bus and feed that
   * data into Malhar streaming platform.
   *
   * <p>[Generate message and send that to Kafka message bus] ==> [Receive that message through
   * Kafka input adapter(i.e. consumer) and send using emitTuples() interface on output port during
   * onMessage call]
   *
   * @throws Exception
   */
  public void testKafkaInputOperator(
      int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid)
      throws Exception {
    // initial the latch for this test
    latch = new CountDownLatch(1);

    // Start producer
    KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
    p.setSendCount(totalCount);
    new Thread(p).start();

    // Create DAG for testing.
    LocalMode lma = LocalMode.newInstance();
    DAG dag = lma.getDAG();

    // Create KafkaSinglePortStringInputOperator
    KafkaSinglePortStringInputOperator node =
        dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class);
    consumer.setTopic(TEST_TOPIC);
    if (isValid) {
      Set<String> brokerSet = new HashSet<String>();
      brokerSet.add("localhost:9092");
      consumer.setBrokerSet(brokerSet);
    }
    node.setConsumer(consumer);

    // Create Test tuple collector
    CollectorModule<String> collector =
        dag.addOperator("TestMessageCollector", new CollectorModule<String>());

    // Connect ports
    dag.addStream("Kafka message", node.outputPort, collector.inputPort)
        .setLocality(Locality.CONTAINER_LOCAL);

    // Create local cluster
    final LocalMode.Controller lc = lma.getController();
    lc.setHeartbeatMonitoringEnabled(false);

    lc.runAsync();

    // Wait 30s for consumer finish consuming all the messages
    Assert.assertTrue("TIMEOUT: 30s ", latch.await(30000, TimeUnit.MILLISECONDS));

    // Check results
    Assert.assertEquals("Collections size", 1, collections.size());
    Assert.assertEquals("Tuple count", totalCount, collections.get(collector.inputPort.id).size());
    logger.debug(
        String.format(
            "Number of emitted tuples: %d", collections.get(collector.inputPort.id).size()));

    p.close();
    lc.shutdown();
  }
 @Test
 public void testKafkaInputOperator_Highleverl() throws Exception {
   int totalCount = 10000;
   Properties props = new Properties();
   props.put("zookeeper.connect", "localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT);
   props.put("group.id", "group1");
   props.put("consumer.id", "default_consumer");
   // This damn property waste me 2 days! It's a 0.8 new property. "smallest" means
   // reset the consumer to the beginning of the message that is not consumed yet
   // otherwise it wont get any of those the produced before!
   KafkaConsumer k = new HighlevelKafkaConsumer(props);
   k.setInitialOffset("earliest");
   testKafkaInputOperator(1000, totalCount, k, true);
 }
Exemplo n.º 5
0
  /**
   * Test KafkaOutputOperator (i.e. an output adapter for Kafka, aka producer). This module sends
   * data into an ActiveMQ message bus.
   *
   * <p>[Generate tuple] ==> [send tuple through Kafka output adapter(i.e. producer) into Kafka
   * message bus] ==> [receive data in outside Kaka listener (i.e consumer)]
   *
   * @throws Exception
   */
  @Test
  @SuppressWarnings({"SleepWhileInLoop", "empty-statement"})
  public void testKafkaOutputOperator() throws Exception {
    // Setup a message listener to receive the message
    KafkaConsumer listener = new KafkaConsumer("topic1");
    new Thread(listener).start();

    // Malhar module to send message
    // Create DAG for testing.
    LocalMode lma = LocalMode.newInstance();
    DAG dag = lma.getDAG();

    // Create ActiveMQStringSinglePortOutputOperator
    StringGeneratorInputOperator generator =
        dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class);
    KafkaStringSinglePortOutputOperator node =
        dag.addOperator("Kafka message producer", KafkaStringSinglePortOutputOperator.class);
    // Set configuration parameters for Kafka
    node.setTopic("topic1");

    // Connect ports
    dag.addStream("Kafka message", generator.outputPort, node.inputPort)
        .setLocality(Locality.CONTAINER_LOCAL);

    // Create local cluster
    final LocalMode.Controller lc = lma.getController();
    lc.runAsync();

    Thread.sleep(2000);
    lc.shutdown();

    // Check values send vs received
    Assert.assertEquals("Number of emitted tuples", tupleCount, listener.holdingBuffer.size());
    logger.debug(String.format("Number of emitted tuples: %d", listener.holdingBuffer.size()));
    Assert.assertEquals(
        "First tuple", "testString 1", listener.getMessage(listener.holdingBuffer.peek()));

    listener.close();
  }
Exemplo n.º 6
0
  @Override
  protected List<ConfigIssue> init() {
    List<ConfigIssue> issues = new ArrayList<ConfigIssue>();
    if (topic == null || topic.isEmpty()) {
      issues.add(
          getContext().createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_05));
    }
    // maxWaitTime
    if (maxWaitTime < 1) {
      issues.add(
          getContext().createConfigIssue(Groups.KAFKA.name(), "maxWaitTime", KafkaErrors.KAFKA_35));
    }

    switch (dataFormat) {
      case JSON:
        if (jsonMaxObjectLen < 1) {
          issues.add(
              getContext()
                  .createConfigIssue(Groups.JSON.name(), "maxJsonObjectLen", KafkaErrors.KAFKA_38));
        }
        break;
      case TEXT:
        if (textMaxLineLen < 1) {
          issues.add(
              getContext()
                  .createConfigIssue(Groups.TEXT.name(), "maxLogLineLength", KafkaErrors.KAFKA_38));
        }
        break;
      case DELIMITED:
        if (csvMaxObjectLen < 1) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.DELIMITED.name(), "csvMaxObjectLen", KafkaErrors.KAFKA_38));
        }
        break;
      case XML:
        if (produceSingleRecordPerMessage) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.KAFKA.name(), "produceSingleRecordPerMessage", KafkaErrors.KAFKA_40));
        }
        if (xmlMaxObjectLen < 1) {
          issues.add(
              getContext()
                  .createConfigIssue(Groups.XML.name(), "maxXmlObjectLen", KafkaErrors.KAFKA_38));
        }
        if (xmlRecordElement != null
            && !xmlRecordElement.isEmpty()
            && !XMLChar.isValidName(xmlRecordElement)) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.XML.name(),
                      "xmlRecordElement",
                      KafkaErrors.KAFKA_36,
                      xmlRecordElement));
        }
        break;
      case SDC_JSON:
      case BINARY:
        break;
      case LOG:
        logDataFormatValidator =
            new LogDataFormatValidator(
                logMode,
                logMaxObjectLen,
                logRetainOriginalLine,
                customLogFormat,
                regex,
                grokPatternDefinition,
                grokPattern,
                enableLog4jCustomLogFormat,
                log4jCustomLogFormat,
                onParseError,
                maxStackTraceLines,
                Groups.LOG.name(),
                getFieldPathToGroupMap(fieldPathsToGroupName));
        logDataFormatValidator.validateLogFormatConfig(issues, getContext());
        break;
      case AVRO:
        if (!messageHasSchema && (avroSchema == null || avroSchema.isEmpty())) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.AVRO.name(), "avroSchema", KafkaErrors.KAFKA_43, avroSchema));
        }
        break;
      default:
        issues.add(
            getContext()
                .createConfigIssue(
                    Groups.KAFKA.name(), "dataFormat", KafkaErrors.KAFKA_39, dataFormat));
    }

    validateParserFactoryConfigs(issues);

    // Validate broker config
    try {
      int partitionCount = KafkaUtil.getPartitionCount(metadataBrokerList, topic, 3, 1000);
      if (partitionCount < 1) {
        issues.add(
            getContext()
                .createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_42, topic));
      } else {
        // cache the partition count as parallelism for future use
        originParallelism = partitionCount;
      }
    } catch (IOException e) {
      issues.add(
          getContext()
              .createConfigIssue(
                  Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_41, topic, e.toString(), e));
    }

    // Validate zookeeper config
    List<KafkaBroker> kafkaBrokers =
        KafkaUtil.validateZkConnectionString(
            issues, zookeeperConnect, Groups.KAFKA.name(), "zookeeperConnect", getContext());

    // validate connecting to kafka
    if (kafkaBrokers != null && !kafkaBrokers.isEmpty() && topic != null && !topic.isEmpty()) {
      kafkaConsumer =
          new KafkaConsumer(
              zookeeperConnect,
              topic,
              consumerGroup,
              maxBatchSize,
              maxWaitTime,
              kafkaConsumerConfigs,
              getContext());
      kafkaConsumer.validate(issues, getContext());
    }

    // consumerGroup
    if (consumerGroup == null || consumerGroup.isEmpty()) {
      issues.add(
          getContext()
              .createConfigIssue(Groups.KAFKA.name(), "consumerGroup", KafkaErrors.KAFKA_33));
    }
    return issues;
  }