public static void main(String[] args) { String master = System.getenv("MASTER"); if (master == null) { master = "local[2]"; } SparkConf conf = new SparkConf().setAppName("Voter Application").setMaster(master); Logger.getLogger("org").setLevel(Level.ERROR); Logger.getLogger("akka").setLevel(Level.ERROR); final Long batch_duration = Long.valueOf(args[0]); JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(Integer.valueOf(args[0]))); jssc.checkpoint("."); JavaReceiverInputDStream<String> votes = jssc.receiverStream(new Voter("localhost", 6789)); // transform text line stream to PhoneCall stream JavaDStream<PhoneCall> phoneCalls = votes.map( new Function<String, PhoneCall>() { public PhoneCall call(String s) { return getPhoneCall(s); } }); JavaDStream<Long> counts = votes.count(); counts.print(); // create updateFunction which is used to update the total call count for each phone number Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction = new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { // add the new values with the previous running count to get the // new count Integer sum = 0; for (Integer i : values) { sum += i; } Integer newSum = sum + state.or(0); return Optional.of(newSum); } }; // JavaPairDStream<Long, Integer> calls = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, Integer>() { public Tuple2<Long, Integer> call(PhoneCall x) { return new Tuple2<Long, Integer>(x.phoneNumber, 1); } }); // generate the accumulated count for phone numbers final JavaPairDStream<Long, Integer> callNumberCounts = calls.updateStateByKey(updateFunction); // callNumberCounts.print(); JavaPairDStream<Long, PhoneCall> pairVotes = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall call) throws Exception { return new Tuple2<Long, PhoneCall>(call.voteId, call); } }); // generate the validate phone numbers, which is still allowed to send vote JavaPairDStream<Long, Integer> allowedCalls = callNumberCounts.filter( new Function<Tuple2<Long, Integer>, Boolean>() { public Boolean call(Tuple2<Long, Integer> v1) throws Exception { if (v1._2() > Voter.MAX_VOTES) return false; return true; } }); // allowedCalls.print(); // get validate contestant phone calls JavaDStream<PhoneCall> validContestantPhoneCalls = phoneCalls.filter( new Function<PhoneCall, Boolean>() { public Boolean call(PhoneCall call) { if (call.contestantNumber > Voter.NUM_CONTESTANTS) return false; return true; } }); JavaPairDStream<Long, PhoneCall> anotherTemporyPhoneCalls = validContestantPhoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall x) { return new Tuple2<Long, PhoneCall>(x.phoneNumber, x); } }); // get validate phone call records JavaPairDStream<Long, Tuple2<PhoneCall, Integer>> validatePhoneCalls = anotherTemporyPhoneCalls.join(allowedCalls); // validatePhoneCalls.print(); JavaDStream<PhoneCall> validateCalls = validatePhoneCalls.transform( new Function<JavaPairRDD<Long, Tuple2<PhoneCall, Integer>>, JavaRDD<PhoneCall>>() { public JavaRDD<PhoneCall> call(JavaPairRDD<Long, Tuple2<PhoneCall, Integer>> v1) throws Exception { JavaRDD<PhoneCall> item = v1.map( new Function<Tuple2<Long, Tuple2<PhoneCall, Integer>>, PhoneCall>() { public PhoneCall call(Tuple2<Long, Tuple2<PhoneCall, Integer>> validItem) throws Exception { return validItem._2()._1(); } }); return item; } }); // validateCalls.print(); // save all votes with redis validateCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { rdd.foreach( new VoidFunction<PhoneCall>() { public void call(PhoneCall call) throws Exception { // System.out.println(call.toString()); String key = String.valueOf(call.voteId); String value = call.getContent(); // save <key,value> using redis JedisPool pool = new JedisPool(new JedisPoolConfig(), "localhost"); Jedis jedis = pool.getResource(); try { jedis.set(key, value); } finally { if (null != jedis) { jedis.close(); } } /// ... when closing your application: pool.destroy(); } }); return null; } }); // validate calls JavaPairDStream<Integer, Integer> contestantVotes = validateCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); // use window to get generate leaderboard Integer size = Integer.valueOf(args[1]); Integer slide = Integer.valueOf(args[2]); JavaDStream<PhoneCall> windowCalls = validateCalls.window(new Duration(size), new Duration(slide)); // windowCalls.print(); // generate window contestant count JavaPairDStream<Integer, Integer> windowContestantNums = windowCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); JavaPairDStream<Integer, Integer> windContestantCounts = windowContestantNums.reduceByKey( new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); windContestantCounts.print(); // generate the accumulated count for contestants JavaPairDStream<Integer, Integer> totalContestantCounts = contestantVotes.updateStateByKey(updateFunction); // used for sorting PairFunction<Tuple2<Integer, Integer>, Integer, Integer> swapFunction = new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() { public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> in) { return in.swap(); } }; JavaPairDStream<Integer, Integer> swappedTotalContestantCounts = totalContestantCounts.mapToPair(swapFunction); JavaPairDStream<Integer, Integer> sortedTotalContestantCounts = swappedTotalContestantCounts.transformToPair( new Function<JavaPairRDD<Integer, Integer>, JavaPairRDD<Integer, Integer>>() { public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in) throws Exception { return in.sortByKey(false); } }); sortedTotalContestantCounts.print(); // make some statistics phoneCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { Long count = rdd.count(); // System.out.println( "count : " + count ); Double throughput = (count.doubleValue() * 1000 / batch_duration.doubleValue()); System.out.println("Current rate = " + throughput + " records / second"); XMemcachedClientBuilder builder = new XMemcachedClientBuilder(AddrUtil.getAddresses("localhost:11211")); XMemcachedClient client = (XMemcachedClient) builder.build(); client.setPrimitiveAsString(true); Long currentTimeStamp = System.currentTimeMillis(); // System.out.println("End time: " + currentTimeStamp); client.add(currentTimeStamp.toString(), 0, throughput); return null; } }); jssc.start(); // Start the computation jssc.awaitTermination(); // Wait for the computation to terminate }
public static void main(String[] args) { if (args.length < 5) { System.out.println( "Usage: StreamProducerJava <infinispan_host> <twitter4j.oauth.consumerKey> <twitter4j.oauth.consumerSecret> <twitter4j.oauth.accessToken> <twitter4j.oauth.accessTokenSecret>"); System.exit(1); } String infinispanHost = args[0]; System.setProperty("twitter4j.oauth.consumerKey", args[1]); System.setProperty("twitter4j.oauth.consumerSecret", args[2]); System.setProperty("twitter4j.oauth.accessToken", args[3]); System.setProperty("twitter4j.oauth.accessTokenSecret", args[4]); // Reduce the log level in the driver Logger.getLogger("org").setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("spark-infinispan-stream-producer-java"); // Create the streaming context JavaStreamingContext javaStreamingContext = new JavaStreamingContext(conf, Seconds.apply(1)); // Populate infinispan properties Properties infinispanProperties = new Properties(); infinispanProperties.put("infinispan.client.hotrod.server_list", infinispanHost); JavaReceiverInputDStream<Status> twitterDStream = TwitterUtils.createStream(javaStreamingContext); // Transform from twitter4j.Status to our domain model org.infinispan.spark.demo.twitter.Tweet JavaDStream<Tuple2<Long, Tweet>> kvPair = twitterDStream.map( status -> new Tuple2<>( status.getId(), new Tweet( status.getId(), status.getUser().getScreenName(), Optional.ofNullable(status.getPlace()) .map(Place::getCountry) .orElseGet(() -> "N/A"), status.getRetweetCount(), status.getText()))); // Write the stream to infinispan InfinispanJavaDStream.writeToInfinispan(kvPair, infinispanProperties); // Create InfinispanInputDStream JavaInputDStream<Tuple3<Long, Tweet, ClientEvent.Type>> infinispanInputDStream = InfinispanJavaDStream.createInfinispanInputDStream( javaStreamingContext, MEMORY_ONLY(), infinispanProperties); // Apply a transformation to the RDDs to aggregate by country JavaPairDStream<String, Integer> countryDStream = infinispanInputDStream.transformToPair( rdd -> { return rdd.filter(ev -> !ev._2().getCountry().equals("N/A")) .mapToPair(event -> new Tuple2<>(event._2().getCountry(), 1)) .reduceByKey((a, b) -> a + b); }); // Since we are interested in the last 60 seconds only, we restrict the DStream by window, // collapsing all the RDDs: JavaPairDStream<String, Integer> lastMinuteStream = countryDStream.reduceByKeyAndWindow((a, b) -> a + b, new Duration(60 * 1000)); lastMinuteStream.foreachRDD( (rdd, time) -> { System.out.format("---------- %s ----------\n", time.toString()); List<Tuple2<String, Integer>> results = rdd.collect(); results .stream() .sorted((o1, o2) -> o2._2().compareTo(o1._2())) .forEach(t -> System.out.format("[%s,%d]\n", t._1(), t._2())); return null; }); // Start the processing javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
public static void main(String[] args) { Logger logger = Logger.getRootLogger(); logger.setLevel(Level.OFF); String consumerKey = "JqQ1lAWg90PVD9U8XoDWedCm8"; String consumerSecret = "QaUe7V9HuYQvC031MVqpUuuP2OjieI0BBDEHLpFOR221zjQ0xp"; String accessToken = "3299869044-UVd8CwTfnDgcGFGPro2yGXKWhArKtXRxC6iekmH"; String accessTokenSecret = "3XtGQi1naI1V9wCVs2aQgEeVWr65vXDczOwGvqa3iGlEG"; System.setProperty("twitter4j.oauth.consumerKey", consumerKey); System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret); System.setProperty("twitter4j.oauth.accessToken", accessToken); System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret); String[] filters = {"bulling", "bullied", "bulling", "bullyed", "bully", "teased"}; SparkConf sparkConf = new SparkConf().setAppName("bullyhunter"); System.out.println("Started bullyhunter..."); JavaStreamingContext sc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); JavaReceiverInputDStream<Status> stream = TwitterUtils.createStream(sc, filters); JavaDStream<String> text = stream.map( new Function<Status, String>() { public String call(Status status) { // String msg = status.getText(); // String filtered_msg = Enrichment.filter(msg); // if (filtered_msg == null) { // return null; // } // TweetRecord tr = new TweetRecord(); // tr.setMsg(filtered_msg); // //tr.setGeo(status.getGeoLocation().getLatitude()); // String fullName = status.getPlace().getFullName(); // if (fullName == null) // return null; // String[] fields = fullName.spilt(DELIMITER); // tr.setCity(fullName.split()); String msg = status.getText(); double ind = Classification.classifyTweet(msg); if (ind > 0) { return status.getText(); } else { return null; } } }); // text = text.filter(new Function<String, Boolean>() { // public Boolean call(String msg) { // boolean containKeyword = false; // String lowerCase = msg.toLowerCase(); // for (String k : keywords) // if (lowerCase.contains(k)) { // containKeyword = true; // break; // } // if (containKeyword == true && lowerCase.contains("bull") // && !lowerCase.contains("RT")) { // return true; // } // return false; // } // // }); text = text.filter( new Function<String, Boolean>() { public Boolean call(String msg) { return (msg == null) ? false : true; } }); text.print(); sc.start(); sc.awaitTermination(); }