@Override protected List<ConfigIssue> init() { List<ConfigIssue> issues = new ArrayList<ConfigIssue>(); if (topic == null || topic.isEmpty()) { issues.add( getContext().createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_05)); } // maxWaitTime if (maxWaitTime < 1) { issues.add( getContext().createConfigIssue(Groups.KAFKA.name(), "maxWaitTime", KafkaErrors.KAFKA_35)); } switch (dataFormat) { case JSON: if (jsonMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue(Groups.JSON.name(), "maxJsonObjectLen", KafkaErrors.KAFKA_38)); } break; case TEXT: if (textMaxLineLen < 1) { issues.add( getContext() .createConfigIssue(Groups.TEXT.name(), "maxLogLineLength", KafkaErrors.KAFKA_38)); } break; case DELIMITED: if (csvMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue( Groups.DELIMITED.name(), "csvMaxObjectLen", KafkaErrors.KAFKA_38)); } break; case XML: if (produceSingleRecordPerMessage) { issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "produceSingleRecordPerMessage", KafkaErrors.KAFKA_40)); } if (xmlMaxObjectLen < 1) { issues.add( getContext() .createConfigIssue(Groups.XML.name(), "maxXmlObjectLen", KafkaErrors.KAFKA_38)); } if (xmlRecordElement != null && !xmlRecordElement.isEmpty() && !XMLChar.isValidName(xmlRecordElement)) { issues.add( getContext() .createConfigIssue( Groups.XML.name(), "xmlRecordElement", KafkaErrors.KAFKA_36, xmlRecordElement)); } break; case SDC_JSON: case BINARY: break; case LOG: logDataFormatValidator = new LogDataFormatValidator( logMode, logMaxObjectLen, logRetainOriginalLine, customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat, log4jCustomLogFormat, onParseError, maxStackTraceLines, Groups.LOG.name(), getFieldPathToGroupMap(fieldPathsToGroupName)); logDataFormatValidator.validateLogFormatConfig(issues, getContext()); break; case AVRO: if (!messageHasSchema && (avroSchema == null || avroSchema.isEmpty())) { issues.add( getContext() .createConfigIssue( Groups.AVRO.name(), "avroSchema", KafkaErrors.KAFKA_43, avroSchema)); } break; default: issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "dataFormat", KafkaErrors.KAFKA_39, dataFormat)); } validateParserFactoryConfigs(issues); // Validate broker config try { int partitionCount = KafkaUtil.getPartitionCount(metadataBrokerList, topic, 3, 1000); if (partitionCount < 1) { issues.add( getContext() .createConfigIssue(Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_42, topic)); } else { // cache the partition count as parallelism for future use originParallelism = partitionCount; } } catch (IOException e) { issues.add( getContext() .createConfigIssue( Groups.KAFKA.name(), "topic", KafkaErrors.KAFKA_41, topic, e.toString(), e)); } // Validate zookeeper config List<KafkaBroker> kafkaBrokers = KafkaUtil.validateZkConnectionString( issues, zookeeperConnect, Groups.KAFKA.name(), "zookeeperConnect", getContext()); // validate connecting to kafka if (kafkaBrokers != null && !kafkaBrokers.isEmpty() && topic != null && !topic.isEmpty()) { kafkaConsumer = new KafkaConsumer( zookeeperConnect, topic, consumerGroup, maxBatchSize, maxWaitTime, kafkaConsumerConfigs, getContext()); kafkaConsumer.validate(issues, getContext()); } // consumerGroup if (consumerGroup == null || consumerGroup.isEmpty()) { issues.add( getContext() .createConfigIssue(Groups.KAFKA.name(), "consumerGroup", KafkaErrors.KAFKA_33)); } return issues; }
public List<Stage.ConfigIssue> init(Source.Context context) { List<Stage.ConfigIssue> issues = new ArrayList<>(); switch (dataFormat) { case JSON: if (dataFormatConfig.jsonMaxObjectLen < 1) { issues.add( context.createConfigIssue( DataFormat.JSON.name(), "dataFormatConfig.maxJsonObjectLen", ParserErrors.PARSER_04)); } break; case TEXT: if (dataFormatConfig.textMaxLineLen < 1) { issues.add( context.createConfigIssue( DataFormat.TEXT.name(), "dataFormatConfig.maxLogLineLength", ParserErrors.PARSER_04)); } break; case DELIMITED: if (dataFormatConfig.csvMaxObjectLen < 1) { issues.add( context.createConfigIssue( DataFormat.DELIMITED.name(), "dataFormatConfig.csvMaxObjectLen", ParserErrors.PARSER_04)); } break; case XML: if (messageConfig != null && messageConfig.produceSingleRecordPerMessage) { issues.add( context.createConfigIssue( parentName, "messageConfig.produceSingleRecordPerMessage", ParserErrors.PARSER_06)); } if (dataFormatConfig.xmlMaxObjectLen < 1) { issues.add( context.createConfigIssue( DataFormat.XML.name(), "dataFormatConfig.maxXmlObjectLen", ParserErrors.PARSER_04)); } if (dataFormatConfig.xmlRecordElement != null && !dataFormatConfig.xmlRecordElement.isEmpty() && !XMLChar.isValidName(dataFormatConfig.xmlRecordElement)) { issues.add( context.createConfigIssue( DataFormat.XML.name(), "dataFormatConfig.xmlRecordElement", ParserErrors.PARSER_02, dataFormatConfig.xmlRecordElement)); } break; case SDC_JSON: break; case LOG: logDataFormatValidator = new LogDataFormatValidator( dataFormatConfig.logMode, dataFormatConfig.logMaxObjectLen, dataFormatConfig.retainOriginalLine, dataFormatConfig.customLogFormat, dataFormatConfig.regex, dataFormatConfig.grokPatternDefinition, dataFormatConfig.grokPattern, dataFormatConfig.enableLog4jCustomLogFormat, dataFormatConfig.log4jCustomLogFormat, dataFormatConfig.onParseError, dataFormatConfig.maxStackTraceLines, DataFormat.LOG.name(), getFieldPathToGroupMap(dataFormatConfig.fieldPathsToGroupName)); logDataFormatValidator.validateLogFormatConfig(issues, context); break; case AVRO: if (!dataFormatConfig.schemaInMessage && (dataFormatConfig.avroSchema == null || dataFormatConfig.avroSchema.isEmpty())) { issues.add( context.createConfigIssue( DataFormat.AVRO.name(), "dataFormatConfig.avroSchema", ParserErrors.PARSER_07, dataFormatConfig.avroSchema)); } break; default: issues.add( context.createConfigIssue( parentName, "dataFormat", ParserErrors.PARSER_05, dataFormat)); } DataParserFactoryBuilder builder = new DataParserFactoryBuilder(context, dataFormat.getParserFormat()) .setCharset(Charset.defaultCharset()); if (dataFormatConfig.charset == null) { messageCharset = StandardCharsets.UTF_8; } else { try { messageCharset = Charset.forName(dataFormatConfig.charset); } catch (UnsupportedCharsetException ex) { // setting it to a valid one so the parser factory can be configured and tested for more // errors messageCharset = StandardCharsets.UTF_8; issues.add( context.createConfigIssue( parentName, "charset", ParserErrors.PARSER_01, dataFormatConfig.charset)); } } builder.setCharset(messageCharset).setRemoveCtrlChars(dataFormatConfig.removeCtrlChars); switch (dataFormat) { case TEXT: builder.setMaxDataLen(dataFormatConfig.textMaxLineLen); break; case JSON: builder.setMode(dataFormatConfig.jsonContent); builder.setMaxDataLen(dataFormatConfig.jsonMaxObjectLen); break; case DELIMITED: builder .setMaxDataLen(dataFormatConfig.csvMaxObjectLen) .setMode(dataFormatConfig.csvFileFormat) .setMode(dataFormatConfig.csvHeader) .setMode(dataFormatConfig.csvRecordType) .setConfig( DelimitedDataParserFactory.DELIMITER_CONFIG, dataFormatConfig.csvCustomDelimiter) .setConfig(DelimitedDataParserFactory.ESCAPE_CONFIG, dataFormatConfig.csvCustomEscape) .setConfig(DelimitedDataParserFactory.QUOTE_CONFIG, dataFormatConfig.csvCustomQuote); break; case XML: builder.setMaxDataLen(dataFormatConfig.xmlMaxObjectLen); builder.setConfig( XmlDataParserFactory.RECORD_ELEMENT_KEY, dataFormatConfig.xmlRecordElement); break; case SDC_JSON: builder.setMaxDataLen(-1); break; case LOG: logDataFormatValidator.populateBuilder(builder); break; case AVRO: builder .setMaxDataLen(Integer.MAX_VALUE) .setConfig(AvroDataParserFactory.SCHEMA_KEY, dataFormatConfig.avroSchema) .setConfig( AvroDataParserFactory.SCHEMA_IN_MESSAGE_KEY, dataFormatConfig.schemaInMessage); break; default: throw new IllegalStateException("Unknown data format: " + dataFormat); } parserFactory = builder.build(); return issues; }