public static void main(String[] args) { if (args.length < 4) { System.err.println("Usage: PDCKafkaConsumer <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } String zkQuorum = args[0]; String kfGrp = args[1]; String[] topics = args[2].split(","); int numThreads = Integer.valueOf(args[3]); Map<String, Integer> topicMap = new HashMap<String, Integer>(); for (String topic : topics) { topicMap.put(topic, numThreads); } SparkConf conf = new SparkConf().setAppName("PDCKafkaConsumer"); conf.set("spark.ui.port", "4040"); JavaStreamingContext ctx = new JavaStreamingContext(conf, new Duration(10000)); JavaPairReceiverInputDStream<String, String> kfStream = KafkaUtils.createStream(ctx, zkQuorum, kfGrp, topicMap); kfStream.saveAsHadoopFiles( "/phasor/pmu/pdc", "in", Text.class, Text.class, TextOutputFormat.class); ctx.start(); ctx.awaitTermination(); }
public static void main(String[] args) { // Create a Spark Context. SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true"); ; JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL); String TOPIC = "activityevent"; String zkQuorum = "localhost:2181"; String group = "1"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(TOPIC, 1); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap); // messages.print(); JavaDStream<String> activitydatastream = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); final Long teamWindowDurationMs = Durations.minutes(1).milliseconds(); JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine); JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream = ActivityEntryDStream.mapToPair( windows -> new Tuple2<>( WithTimestamp.create( windows.getActivity(), // Apply Fixed Window by rounding the timestamp down to the nearest // multiple of the window size (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs) * teamWindowDurationMs), windows.getXaxis())) .reduceByKey(SUM_REDUCER); ActivityWindowDStream.print(); jssc.start(); jssc.awaitTermination(); // jssc.close(); sc.stop(); sc.close(); }
public static void main(String[] args) throws IOException, ParseException { KafkaConsumerHelper helper = new KafkaConsumerHelper(args); JavaStreamingContext jssc = null; try { jssc = helper.createJavaStreamingContext(APP_NAME); JavaPairReceiverInputDStream<String, String> rs = helper.createReceiverStream(jssc); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); conf.set("dfs.replication", "1"); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); rs.saveAsNewAPIHadoopFiles( "prefix", "txt", Integer.class, Integer.class, TextOutputFormat.class, conf); // rs.saveAsHadoopFiles("qwe_prefix_", "_qwe_suffix", ); // JavaDStream<Text> map = rs.map(t -> new Text(t._2())); // rs.foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() { // @Override // public void call(JavaPairRDD<String, String> pair) throws Exception { // pair.saveAsHadoopFile(); // } // }); Utils.consolog("start...awaitTermination... (type 'Q' to finish)"); helper.startAndWait(jssc); } catch (Exception e) { e.printStackTrace(); } finally { if (jssc != null) { Utils.consolog("stopping...closing..."); helper.stopAndClose(jssc); System.out.println("~~~~~~~~~~~~~~~~~~~~~~kafkaStream.saveAsHadoopFiles"); } Utils.consolog("~~ DONE ~~"); } }
/** * We get pairs of <key,message> the key is a simple UUID The message format is * <producer-id>_<msg-value> Ignore the keys and work on the messages Split the messages according * to "_" Create a new set of pairs with <producer-id,msg-value> */ protected static void countMessagesPerProducer( JavaPairReceiverInputDStream<String, String> messages) { messages.foreachRDD( new VoidFunction2<JavaPairRDD<String, String>, Time>() { @Override public void call(JavaPairRDD<String, String> pairs, Time time) throws Exception { if (pairs.count() > 0) { Utils.consolog("outer ==> pairs.count=[" + pairs.count() + "] time=[" + time + "]"); JavaPairRDD<String, String> messageAndProducerId = pairs.<String, String>mapToPair( new PairFunction<Tuple2<String, String>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, String> t) throws Exception { String[] split = t._2().split("_"); String producerId = split[0]; String msgValue = split[1].trim(); return new Tuple2<>(producerId, msgValue); } }); Map<String, Object> counters = messageAndProducerId.countByKey(); if (!counters.isEmpty()) { StringBuilder buf = new StringBuilder(); buf.append("{ "); ArrayList<String> sortedKeys = new ArrayList<>(counters.keySet()); sortedKeys.sort((o1, o2) -> Integer.valueOf(o1).compareTo(Integer.valueOf(o2))); for (String key : sortedKeys) { Object value = counters.get(key); buf.append("(" + key + "," + value + "),"); } buf.append(" }"); Utils.consolog("inner ==> " + buf.toString()); } } } }); }
public static void main(String[] args) { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // sparkConf.setMaster("spark://60f81dc6426c:7077"); // SparkConf sparkConf = new // SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Logger.getLogger("org").setLevel(Level.OFF); Logger.getLogger("akka").setLevel(Level.OFF); Map<String, Integer> topicMap = new HashMap<String, Integer>(); String[] topics = args[2].split(","); for (String topic : topics) { topicMap.put(topic, numThreads); } /* for(String t: topic) { topicMap.put(t, new Integer(3)); }*/ // NotSerializable notSerializable = new NotSerializable(); // JavaRDD<String> rdd = sc.textFile("/tmp/myfile"); // rdd.map(s -> notSerializable.doSomething(s)).collect(); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); // JavaPairReceiverInputDStream<String, String> kafkaStream = // KafkaUtils.createStream(jssc, "localhost:2181","streamingContext", // topicMap); System.out.println("Connection !!!!"); /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>() { public String call(Tuple2<String, String> message) { return message._2(); } } );*/ JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }