@Test public void testKafkaInputOperator_Simple() throws Exception { int totalCount = 10000; KafkaConsumer k = new SimpleKafkaConsumer(); k.setInitialOffset("earliest"); testKafkaInputOperator(1000, totalCount, k, true); }
public static void main(String[] args) { OptionFactory optFactory = OptionFactory.getInstance(); Options options = KafkaConsumer.getOptions(); options.addOption(optFactory.create("tbl", "table", true, "HBase table name.").required()); options.addOption(optFactory.create("f", "family", true, "Column family.")); options.addOption(optFactory.create("q", "qualifier", true, "Column qualifier.")); options.addOption( optFactory.create( "b", "batchsize", true, "Batch size (number of messages per HBase flush).")); options.addOption(optFactory.create("pd", "prefixdate", false, "Prefix key with salted date.")); CommandLineParser parser = new GnuParser(); ShutdownHook sh = ShutdownHook.getInstance(); try { // Parse command line options CommandLine cmd = parser.parse(options, args); final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd); sh.addFirst(consumer); // Create a sink for storing data SinkConfiguration sinkConfig = new SinkConfiguration(); if (cmd.hasOption("numthreads")) { sinkConfig.setInt( "hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads"))); } if (cmd.hasOption("batchsize")) { sinkConfig.setInt( "hbasesink.hbase.batchsize", Integer.parseInt(cmd.getOptionValue("batchsize"))); } sinkConfig.setString("hbasesink.hbase.tablename", cmd.getOptionValue("table")); sinkConfig.setString("hbasesink.hbase.column.family", cmd.getOptionValue("family", "data")); sinkConfig.setString( "hbasesink.hbase.column.qualifier", cmd.getOptionValue("qualifier", "json")); sinkConfig.setBoolean("hbasesink.hbase.rowkey.prefixdate", cmd.hasOption("prefixdate")); KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(HBaseSink.class, sinkConfig); sh.addLast(sinkFactory); // Set the sink factory for consumer storage consumer.setSinkFactory(sinkFactory); prepareHealthChecks(); // Initialize metrics collection, reporting, etc. final MetricsManager manager = MetricsManager.getDefaultMetricsManager(); // Begin polling consumer.poll(); } catch (ParseException e) { LOG.error("Error parsing command line options", e); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(KafkaHBaseConsumer.class.getName(), options); } }
/** * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for Kafka, aka consumer). This * module receives data from an outside test generator through Kafka message bus and feed that * data into Malhar streaming platform. * * <p>[Generate message and send that to Kafka message bus] ==> [Receive that message through * Kafka input adapter(i.e. consumer) and send using emitTuples() interface on output port during * onMessage call] * * @throws Exception */ public void testKafkaInputOperator( int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid) throws Exception { // initial the latch for this test latch = new CountDownLatch(1); // Start producer KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC); p.setSendCount(totalCount); new Thread(p).start(); // Create DAG for testing. LocalMode lma = LocalMode.newInstance(); DAG dag = lma.getDAG(); // Create KafkaSinglePortStringInputOperator KafkaSinglePortStringInputOperator node = dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class); consumer.setTopic(TEST_TOPIC); if (isValid) { Set<String> brokerSet = new HashSet<String>(); brokerSet.add("localhost:9092"); consumer.setBrokerSet(brokerSet); } node.setConsumer(consumer); // Create Test tuple collector CollectorModule<String> collector = dag.addOperator("TestMessageCollector", new CollectorModule<String>()); // Connect ports dag.addStream("Kafka message", node.outputPort, collector.inputPort) .setLocality(Locality.CONTAINER_LOCAL); // Create local cluster final LocalMode.Controller lc = lma.getController(); lc.setHeartbeatMonitoringEnabled(false); lc.runAsync(); // Wait 30s for consumer finish consuming all the messages Assert.assertTrue("TIMEOUT: 30s ", latch.await(30000, TimeUnit.MILLISECONDS)); // Check results Assert.assertEquals("Collections size", 1, collections.size()); Assert.assertEquals("Tuple count", totalCount, collections.get(collector.inputPort.id).size()); logger.debug( String.format( "Number of emitted tuples: %d", collections.get(collector.inputPort.id).size())); p.close(); lc.shutdown(); }
@Test public void testKafkaInputOperator_Highleverl() throws Exception { int totalCount = 10000; Properties props = new Properties(); props.put("zookeeper.connect", "localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT); props.put("group.id", "group1"); props.put("consumer.id", "default_consumer"); // This damn property waste me 2 days! It's a 0.8 new property. "smallest" means // reset the consumer to the beginning of the message that is not consumed yet // otherwise it wont get any of those the produced before! KafkaConsumer k = new HighlevelKafkaConsumer(props); k.setInitialOffset("earliest"); testKafkaInputOperator(1000, totalCount, k, true); }
/** * Test KafkaOutputOperator (i.e. an output adapter for Kafka, aka producer). This module sends * data into an ActiveMQ message bus. * * <p>[Generate tuple] ==> [send tuple through Kafka output adapter(i.e. producer) into Kafka * message bus] ==> [receive data in outside Kaka listener (i.e consumer)] * * @throws Exception */ @Test @SuppressWarnings({"SleepWhileInLoop", "empty-statement"}) public void testKafkaOutputOperator() throws Exception { // Setup a message listener to receive the message KafkaConsumer listener = new KafkaConsumer("topic1"); new Thread(listener).start(); // Malhar module to send message // Create DAG for testing. LocalMode lma = LocalMode.newInstance(); DAG dag = lma.getDAG(); // Create ActiveMQStringSinglePortOutputOperator StringGeneratorInputOperator generator = dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class); KafkaStringSinglePortOutputOperator node = dag.addOperator("Kafka message producer", KafkaStringSinglePortOutputOperator.class); // Set configuration parameters for Kafka node.setTopic("topic1"); // Connect ports dag.addStream("Kafka message", generator.outputPort, node.inputPort) .setLocality(Locality.CONTAINER_LOCAL); // Create local cluster final LocalMode.Controller lc = lma.getController(); lc.runAsync(); Thread.sleep(2000); lc.shutdown(); // Check values send vs received Assert.assertEquals("Number of emitted tuples", tupleCount, listener.holdingBuffer.size()); logger.debug(String.format("Number of emitted tuples: %d", listener.holdingBuffer.size())); Assert.assertEquals( "First tuple", "testString 1", listener.getMessage(listener.holdingBuffer.peek())); listener.close(); }
@Override protected List<ConfigIssue> init() { List<ConfigIssue> issues = new ArrayList<ConfigIssue>(); if (topic == null || topic.isEmpty()) { issues.add( getContext().createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_05)); } // maxWaitTime if (maxWaitTime < 1) { issues.add( getContext().createConfigIssue(Groups.KAFKA.name(), "maxWaitTime", KafkaErrors.KAFKA_35)); } switch (dataFormat) { case JSON: if (jsonMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue(Groups.JSON.name(), "maxJsonObjectLen", KafkaErrors.KAFKA_38)); } break; case TEXT: if (textMaxLineLen < 1) { issues.add( getContext() .createConfigIssue(Groups.TEXT.name(), "maxLogLineLength", KafkaErrors.KAFKA_38)); } break; case DELIMITED: if (csvMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue( Groups.DELIMITED.name(), "csvMaxObjectLen", KafkaErrors.KAFKA_38)); } break; case XML: if (produceSingleRecordPerMessage) { issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "produceSingleRecordPerMessage", KafkaErrors.KAFKA_40)); } if (xmlMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue(Groups.XML.name(), "maxXmlObjectLen", KafkaErrors.KAFKA_38)); } if (xmlRecordElement != null && !xmlRecordElement.isEmpty() && !XMLChar.isValidName(xmlRecordElement)) { issues.add( getContext() .createConfigIssue( Groups.XML.name(), "xmlRecordElement", KafkaErrors.KAFKA_36, xmlRecordElement)); } break; case SDC_JSON: case BINARY: break; case LOG: logDataFormatValidator = new LogDataFormatValidator( logMode, logMaxObjectLen, logRetainOriginalLine, customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat, log4jCustomLogFormat, onParseError, maxStackTraceLines, Groups.LOG.name(), getFieldPathToGroupMap(fieldPathsToGroupName)); logDataFormatValidator.validateLogFormatConfig(issues, getContext()); break; case AVRO: if (!messageHasSchema && (avroSchema == null || avroSchema.isEmpty())) { issues.add( getContext() .createConfigIssue( Groups.AVRO.name(), "avroSchema", KafkaErrors.KAFKA_43, avroSchema)); } break; default: issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "dataFormat", KafkaErrors.KAFKA_39, dataFormat)); } validateParserFactoryConfigs(issues); // Validate broker config try { int partitionCount = KafkaUtil.getPartitionCount(metadataBrokerList, topic, 3, 1000); if (partitionCount < 1) { issues.add( getContext() .createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_42, topic)); } else { // cache the partition count as parallelism for future use originParallelism = partitionCount; } } catch (IOException e) { issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_41, topic, e.toString(), e)); } // Validate zookeeper config List<KafkaBroker> kafkaBrokers = KafkaUtil.validateZkConnectionString( issues, zookeeperConnect, Groups.KAFKA.name(), "zookeeperConnect", getContext()); // validate connecting to kafka if (kafkaBrokers != null && !kafkaBrokers.isEmpty() && topic != null && !topic.isEmpty()) { kafkaConsumer = new KafkaConsumer( zookeeperConnect, topic, consumerGroup, maxBatchSize, maxWaitTime, kafkaConsumerConfigs, getContext()); kafkaConsumer.validate(issues, getContext()); } // consumerGroup if (consumerGroup == null || consumerGroup.isEmpty()) { issues.add( getContext() .createConfigIssue(Groups.KAFKA.name(), "consumerGroup", KafkaErrors.KAFKA_33)); } return issues; }