@Override public SparkWorkloadOperator<WithTime<String>> stringStreamFromKafkaWithTime( String zkConStr, String kafkaServers, String group, String topics, String offset, String componentId, int parallelism) { HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", kafkaServers); kafkaParams.put("auto.offset.reset", offset); kafkaParams.put("zookeeper.connect", zkConStr); kafkaParams.put("group.id", group); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); JavaDStream<WithTime<String>> lines = messages.map(mapFunctionWithTime); return new SparkWorkloadOperator<>(lines, parallelism); }
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("kafka-sandbox").setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "sandbox.hortonworks.com:6667"); JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream( ssc, String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics); directKafkaStream.foreachRDD( rdd -> { rdd.foreach( avroRecord -> { Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse(AvroVulabProducer.USER_SCHEMA); Injection<GenericRecord, byte[]> recordInjection = GenericAvroCodecs.toBinary(schema); GenericRecord record = recordInjection.invert(avroRecord._2).get(); System.out.println( "str1= " + record.get("str1") + ", str2= " + record.get("str2") + ", int1=" + record.get("int1")); }); }); ssc.start(); ssc.awaitTermination(); }
public void run() { System.setProperty("spark.hadoop.dfs.replication", "2"); Logger.getLogger("org").setLevel(Level.OFF); Logger.getLogger("akka").setLevel(Level.OFF); SparkConf conf = new SparkConf().setAppName("WindowingKafkaWordCountWithFaultTolerance"); conf.set("spark.master", PropertiesStack.getProperty("spark.master")); conf.set("spark.executor.memory", PropertiesStack.getProperty("spark.executor.memory")); conf.set("spark.driver.memory", PropertiesStack.getProperty("spark.driver.memory")); conf.set( "spark.driver.maxResultSize", PropertiesStack.getProperty("spark.driver.maxResultSize")); // .setAppName("WindowingKafkaWordCountWithoutFaultTolerance"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(10)); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(PropertiesStack.getKafkaTopic())); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", PropertiesStack.getKafkaBootstrapServers()); kafkaParams.put("zookeeper.connect", PropertiesStack.getZookeeperConnect()); kafkaParams.put("auto.offset.reset", "smallest"); kafkaParams.put("group.id", PropertiesStack.getKafkaGroupId()); kafkaParams.put("auto.commit.enable", "false"); Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(PropertiesStack.getKafkaTopic(), 1); // Map<kafka.common.TopicAndPartition, java.lang.Long> fromOffsets = new HashMap<>(); // fromOffsets.put(new TopicAndPartition(PropertiesStack.getKafkaTopic(), // 1), 1000L); // Create direct kafka stream with brokers and topics // JavaInputDStream<String> messages = KafkaUtils // .createDirectStream( // jssc, // String.class, // String.class, // StringDecoder.class, // StringDecoder.class, // String.class, // kafkaParams, // fromOffsets, // new Function<kafka.message.MessageAndMetadata<String, String>, String>() { // @Override // public String call( // MessageAndMetadata<String, String> v1) // throws Exception { // return v1.message(); // } // }); JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); messages.count().print(); // .createStream(jssc, PropertiesStack.getZookeeperConnect(), // PropertiesStack.getKafkaGroupId(), topicMap); // Start the computation jssc.start(); jssc.awaitTermination(); }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println( "Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1); } String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Arrays.asList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }