public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("Example").setMaster("local[2]"); // Create a StreamingContext with a 1-second batch size from a SparkConf JavaStreamingContext jssc = new JavaStreamingContext( conf, Durations.seconds(10)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" // Split up into words. final JavaDStream<String> wordDStream = lines.flatMap( new FlatMapFunction<String, String>() { public Iterable<String> call(String x) { return Arrays.asList(x.split(" ")); } }); final JavaPairDStream<String, Integer> wordPairDStream = wordDStream.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { return new Tuple2<String, Integer>(s, 1); } }); final JavaPairDStream<String, Integer> totalWordPairDStream = wordPairDStream.reduceByKeyAndWindow( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer a, Integer b) throws Exception { return a + b; } }, Durations.seconds(30), Durations.seconds(20)); // totalWordPairDStream.foreach(new Function2<JavaPairRDD<String, Integer>, Time, Void>() // { // @Override // public Void call(JavaPairRDD<String, Integer> wordPairRDD, Time time) throws // Exception { // // final List<Tuple2<String, Integer>> collect = wordPairRDD.collect(); // for (Tuple2<String, Integer> t : collect) { // System.out.println("the value is t" + t._1() + "," + t._2()); // } // // return null; // } // }); totalWordPairDStream.print(); // Start our streaming context and wait for it to "finish" jssc.start(); // Wait for the job to finish jssc.awaitTermination(); }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setMaster("local[4]") .setAppName("SparkStreamingPullDataFromFlume for Java"); JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(30)); // JavaReceiverInputDStream<SparkFlumeEvent> lines = FlumeUtils.createStream(jsc,"master1", // 9999); flume push data to Spark Streaming JavaReceiverInputDStream<SparkFlumeEvent> lines = FlumeUtils.createPollingStream( jsc, "master1", 9999); // Spark Streaming pull data from flume JavaDStream<String> words = lines.flatMap( new FlatMapFunction<SparkFlumeEvent, String>() { private static final long serialVersionUID = 1L; @Override public Iterable<String> call(SparkFlumeEvent event) throws Exception { String line = new String(event.event().getBody().array()); return Arrays.asList(line.split(" ")); } }); JavaPairDStream<String, Integer> pairs = words.mapToPair( new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String, Integer>(word, 1); } }); JavaPairDStream<String, Integer> wordsCount = pairs.reduceByKey( new Function2< Integer, Integer, Integer>() { // 对相同的Key,进行Value的累计(包括Local和Reducer级别同时Reduce) private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1 + v2; } }); wordsCount.print(); jsc.start(); jsc.awaitTermination(); jsc.close(); }
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: JavaQueueStream <master>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context JavaStreamingContext ssc = new JavaStreamingContext( args[0], "QueueStream", new Duration(1000), System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaQueueStream.class)); // Create the queue through which RDDs can be pushed to // a QueueInputDStream Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>(); // Create and push some RDDs into the queue List<Integer> list = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { list.add(i); } for (int i = 0; i < 30; i++) { rddQueue.add(ssc.sparkContext().parallelize(list)); } // Create the QueueInputDStream and use it do some processing JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue); JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair( new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer i) { return new Tuple2<Integer, Integer>(i % 10, 1); } }); JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); reducedStream.print(); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) throws IOException { SparkConf conf = new SparkConf().setAppName("faebookStream"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(15)); JavaDStream<String> stream = jssc.textFileStream("/Users/sboynenpalli/Desktop/shashankOfficeMackbook/sparkinputfolder"); stream.print(10); ProcessVideoStreamData processData = new ProcessVideoStreamData(); processData.processData(stream); jssc.start(); jssc.awaitTermination(); }
private static JavaStreamingContext createContext(String input, String checkpointDirectory) { System.out.println("Creating new context"); // final File outputFile = new File("/flume_recover"); // if (outputFile.exists()) { // outputFile.delete(); // } SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("Stream File") .set("spark.driver.allowMultipleContexts", "true"); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.set("es.index.auto.create", "true"); conf.set("es.nodes", "10.26.1.134:9200"); conf.set("es.resource", "flume/test"); conf.set("es.input.json", "true"); JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(3000)); jssc.checkpoint(checkpointDirectory); JavaDStream<String> textFile = jssc.textFileStream(input); JavaDStream<String> jsonStr = textFile.map( new Function<String, String>() { public String call(String arg0) throws Exception { Matcher m = log.matcher(arg0); if (m.find()) { return transferJson(m); } return ""; } }); jsonStr.print(); jsonStr.foreach( new Function<JavaRDD<String>, Void>() { public Void call(JavaRDD<String> arg0) throws Exception { if (!arg0.isEmpty() && arg0 != null) { JavaEsSpark.saveToEs(arg0, "flume/test"); } return null; } }); return jssc; }
public static void main(String[] args) { // Create a Spark Context. SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true"); ; JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL); String TOPIC = "activityevent"; String zkQuorum = "localhost:2181"; String group = "1"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(TOPIC, 1); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap); // messages.print(); JavaDStream<String> activitydatastream = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); final Long teamWindowDurationMs = Durations.minutes(1).milliseconds(); JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine); JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream = ActivityEntryDStream.mapToPair( windows -> new Tuple2<>( WithTimestamp.create( windows.getActivity(), // Apply Fixed Window by rounding the timestamp down to the nearest // multiple of the window size (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs) * teamWindowDurationMs), windows.getXaxis())) .reduceByKey(SUM_REDUCER); ActivityWindowDStream.print(); jssc.start(); jssc.awaitTermination(); // jssc.close(); sc.stop(); sc.close(); }
@Test @SuppressWarnings("unchecked") public void testJavaDStreamFunctions() throws Exception { JavaDStream<String> mockJavaDStream = mock(JavaDStream.class); DStream<String> mockDStream = mock(DStream.class); when(mockJavaDStream.dstream()).thenReturn(mockDStream); GemFireJavaDStreamFunctions wrapper = javaFunctions(mockJavaDStream); assertTrue(mockDStream == wrapper.dsf.dstream()); Tuple3<SparkContext, GemFireConnectionConf, GemFireConnection> tuple3 = createCommonMocks(); PairFunction<String, String, Integer> mockPairFunc = mock(PairFunction.class); String regionPath = "testregion"; wrapper.saveToGemfire(regionPath, mockPairFunc, tuple3._2()); verify(tuple3._2()).getConnection(); verify(tuple3._3()).validateRegion(regionPath); verify(mockDStream).foreachRDD(any(Function1.class)); }
@Override public void configureOutput(JavaDStream<AggregateResult> aggregatedOutputs) throws IOException { aggregatedOutputs.foreachRDD( (aggregateResultRDD, time) -> { aggregateResultRDD.foreachPartition( aggregateResultIterator -> writeToPubsubWithRetry( time, TaskContext.getPartitionId(), aggregateResultIterator, 3)); return null; }); }
public static void main(String[] args) { if (args.length != 3) { System.err.println("Usage: JavaFlumeEventCount <master> <host> <port>"); System.exit(1); } String master = args[0]; String host = args[1]; int port = Integer.parseInt(args[2]); Duration batchInterval = new Duration(2000); JavaStreamingContext ssc = new JavaStreamingContext( master, "FlumeEventCount", batchInterval, System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaFlumeEventCount.class)); JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, "localhost", port); flumeStream.count(); flumeStream .count() .map( new Function<Long, String>() { @Override public String call(Long in) { return "Received " + in + " flume events."; } }) .print(); ssc.start(); }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); // Convert RDDs of the words DStream to DataFrame and run SQL query words.foreachRDD( new VoidFunction2<JavaRDD<String>, Time>() { @Override public void call(JavaRDD<String> rdd, Time time) { SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf()); // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame JavaRDD<JavaRecord> rowRDD = rdd.map( new Function<String, JavaRecord>() { @Override public JavaRecord call(String word) { JavaRecord record = new JavaRecord(); record.setWord(word); return record; } }); Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class); // Register as table wordsDataFrame.createOrReplaceTempView("words"); // Do word count on table using SQL and print it Dataset<Row> wordCountsDataFrame = spark.sql("select word, count(*) as total from words group by word"); System.out.println("========= " + time + "========="); wordCountsDataFrame.show(); } }); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println( "Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1); } String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Arrays.asList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
public JavaStreamingContext createContext() { CassandraConnectionContext connectionContext = connectToCassandra(); JavaDStream<String> rdd = connectionContext.getRDD(); JavaStreamingContext sparkContext = connectionContext.getStreamingContext(); final CassandraConnector connector = connectionContext.getCassandraConnector(); final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); JavaDStream<Tuple2<String, Transaction>> transactionList = rdd.map( new Function<String, Tuple2<String, Transaction>>() { @Override public Tuple2<String, Transaction> call(String line) throws Exception { String[] columns = line.split(","); String taxId = columns[0]; String name = columns[1]; String merchant = columns[2]; BigDecimal amount = new BigDecimal(columns[3]); Date transactionDate; try { transactionDate = format.parse(columns[4]); } catch (ParseException e) { e.printStackTrace(); throw new RuntimeException(e); } String tranId = columns[5]; System.out.println(line); Tuple2<String, Transaction> transaction = new Tuple2<>( taxId, new Transaction(name, merchant, amount, transactionDate, tranId)); return transaction; } }); transactionList.cache(); final String warningsTableName = "warnings"; try (Session session = connector.openSession()) { session.execute( String.format("DROP TABLE IF EXISTS %s.%s", getKeySpaceName(), warningsTableName)); session.execute( String.format( "CREATE TABLE IF NOT EXISTS %s.%s (ssn text, " + "id uuid, amount decimal, rule text, PRIMARY KEY(ssn, id))", getKeySpaceName(), warningsTableName)); } // setup warning on more than certain number of transactions by user in a 60 second window, // every 10 seconds JavaPairDStream<String, BigDecimal> warnings = transactionList .window(new Duration(60000), new Duration(10000)) .mapToPair( new PairFunction<Tuple2<String, Transaction>, String, BigDecimal>() { @Override public Tuple2<String, BigDecimal> call(Tuple2<String, Transaction> transaction) throws Exception { String taxId = transaction._1(); BigDecimal amount = transaction._2().getAmount(); return new Tuple2<>(taxId, amount); } }) .reduceByKey( new Function2<BigDecimal, BigDecimal, BigDecimal>() { @Override public BigDecimal call( BigDecimal transactionAmount1, BigDecimal transactionAmount2) throws Exception { return transactionAmount1.add(transactionAmount2); } }) .filter( new Function<Tuple2<String, BigDecimal>, Boolean>() { @Override public Boolean call(Tuple2<String, BigDecimal> transactionSumByTaxId) throws Exception { // returns true if total is greater than 999 Boolean result = transactionSumByTaxId._2().compareTo(new BigDecimal(999)) == 1; System.out.println( "tran " + transactionSumByTaxId._1() + " with amount " + transactionSumByTaxId._2()); if (result) { System.out.println( "warning " + transactionSumByTaxId._1() + " has value " + transactionSumByTaxId._2() + " is greater than 999"); } return result; } }); JavaDStream<Warning> mappedWarnings = warnings.map( new Function<Tuple2<String, BigDecimal>, Warning>() { @Override public Warning call(Tuple2<String, BigDecimal> warnings) throws Exception { Warning warning = new Warning(); warning.setSsn(warnings._1()); warning.setId(UUIDs.timeBased()); warning.setAmount(warnings._2()); warning.setRule("OVER_DOLLAR_AMOUNT"); return warning; } }); javaFunctions(mappedWarnings) .writerBuilder(getKeySpaceName(), warningsTableName, mapToRow(Warning.class)) .saveToCassandra(); return sparkContext; }
@Test public void testKafkaStream() throws InterruptedException { final String topic1 = "topic1"; final String topic2 = "topic2"; // hold a reference to the current offset ranges, so it can be used downstream final AtomicReference<OffsetRange[]> offsetRanges = new AtomicReference<>(); String[] topic1data = createTopicAndSendData(topic1); String[] topic2data = createTopicAndSendData(topic2); Set<String> sent = new HashSet<>(); sent.addAll(Arrays.asList(topic1data)); sent.addAll(Arrays.asList(topic2data)); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", kafkaTestUtils.brokerAddress()); kafkaParams.put("auto.offset.reset", "smallest"); JavaDStream<String> stream1 = KafkaUtils.createDirectStream( ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicToSet(topic1)) .transformToPair( // Make sure you can get offset ranges from the rdd new Function<JavaPairRDD<String, String>, JavaPairRDD<String, String>>() { @Override public JavaPairRDD<String, String> call(JavaPairRDD<String, String> rdd) { OffsetRange[] offsets = ((HasOffsetRanges) rdd.rdd()).offsetRanges(); offsetRanges.set(offsets); Assert.assertEquals(topic1, offsets[0].topic()); return rdd; } }) .map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> kv) { return kv._2(); } }); JavaDStream<String> stream2 = KafkaUtils.createDirectStream( ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, String.class, kafkaParams, topicOffsetToMap(topic2, 0L), new Function<MessageAndMetadata<String, String>, String>() { @Override public String call(MessageAndMetadata<String, String> msgAndMd) { return msgAndMd.message(); } }); JavaDStream<String> unifiedStream = stream1.union(stream2); final Set<String> result = Collections.synchronizedSet(new HashSet<String>()); unifiedStream.foreachRDD( new VoidFunction<JavaRDD<String>>() { @Override public void call(JavaRDD<String> rdd) { result.addAll(rdd.collect()); for (OffsetRange o : offsetRanges.get()) { System.out.println( o.topic() + " " + o.partition() + " " + o.fromOffset() + " " + o.untilOffset()); } } }); ssc.start(); long startTime = System.currentTimeMillis(); boolean matches = false; while (!matches && System.currentTimeMillis() - startTime < 20000) { matches = sent.size() == result.size(); Thread.sleep(50); } Assert.assertEquals(sent, result); ssc.stop(); }
public static void main(String[] args) { String master = System.getenv("MASTER"); if (master == null) { master = "local[2]"; } SparkConf conf = new SparkConf().setAppName("Voter Application").setMaster(master); Logger.getLogger("org").setLevel(Level.ERROR); Logger.getLogger("akka").setLevel(Level.ERROR); final Long batch_duration = Long.valueOf(args[0]); JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(Integer.valueOf(args[0]))); jssc.checkpoint("."); JavaReceiverInputDStream<String> votes = jssc.receiverStream(new Voter("localhost", 6789)); // transform text line stream to PhoneCall stream JavaDStream<PhoneCall> phoneCalls = votes.map( new Function<String, PhoneCall>() { public PhoneCall call(String s) { return getPhoneCall(s); } }); JavaDStream<Long> counts = votes.count(); counts.print(); // create updateFunction which is used to update the total call count for each phone number Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction = new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { // add the new values with the previous running count to get the // new count Integer sum = 0; for (Integer i : values) { sum += i; } Integer newSum = sum + state.or(0); return Optional.of(newSum); } }; // JavaPairDStream<Long, Integer> calls = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, Integer>() { public Tuple2<Long, Integer> call(PhoneCall x) { return new Tuple2<Long, Integer>(x.phoneNumber, 1); } }); // generate the accumulated count for phone numbers final JavaPairDStream<Long, Integer> callNumberCounts = calls.updateStateByKey(updateFunction); // callNumberCounts.print(); JavaPairDStream<Long, PhoneCall> pairVotes = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall call) throws Exception { return new Tuple2<Long, PhoneCall>(call.voteId, call); } }); // generate the validate phone numbers, which is still allowed to send vote JavaPairDStream<Long, Integer> allowedCalls = callNumberCounts.filter( new Function<Tuple2<Long, Integer>, Boolean>() { public Boolean call(Tuple2<Long, Integer> v1) throws Exception { if (v1._2() > Voter.MAX_VOTES) return false; return true; } }); // allowedCalls.print(); // get validate contestant phone calls JavaDStream<PhoneCall> validContestantPhoneCalls = phoneCalls.filter( new Function<PhoneCall, Boolean>() { public Boolean call(PhoneCall call) { if (call.contestantNumber > Voter.NUM_CONTESTANTS) return false; return true; } }); JavaPairDStream<Long, PhoneCall> anotherTemporyPhoneCalls = validContestantPhoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall x) { return new Tuple2<Long, PhoneCall>(x.phoneNumber, x); } }); // get validate phone call records JavaPairDStream<Long, Tuple2<PhoneCall, Integer>> validatePhoneCalls = anotherTemporyPhoneCalls.join(allowedCalls); // validatePhoneCalls.print(); JavaDStream<PhoneCall> validateCalls = validatePhoneCalls.transform( new Function<JavaPairRDD<Long, Tuple2<PhoneCall, Integer>>, JavaRDD<PhoneCall>>() { public JavaRDD<PhoneCall> call(JavaPairRDD<Long, Tuple2<PhoneCall, Integer>> v1) throws Exception { JavaRDD<PhoneCall> item = v1.map( new Function<Tuple2<Long, Tuple2<PhoneCall, Integer>>, PhoneCall>() { public PhoneCall call(Tuple2<Long, Tuple2<PhoneCall, Integer>> validItem) throws Exception { return validItem._2()._1(); } }); return item; } }); // validateCalls.print(); // save all votes with redis validateCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { rdd.foreach( new VoidFunction<PhoneCall>() { public void call(PhoneCall call) throws Exception { // System.out.println(call.toString()); String key = String.valueOf(call.voteId); String value = call.getContent(); // save <key,value> using redis JedisPool pool = new JedisPool(new JedisPoolConfig(), "localhost"); Jedis jedis = pool.getResource(); try { jedis.set(key, value); } finally { if (null != jedis) { jedis.close(); } } /// ... when closing your application: pool.destroy(); } }); return null; } }); // validate calls JavaPairDStream<Integer, Integer> contestantVotes = validateCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); // use window to get generate leaderboard Integer size = Integer.valueOf(args[1]); Integer slide = Integer.valueOf(args[2]); JavaDStream<PhoneCall> windowCalls = validateCalls.window(new Duration(size), new Duration(slide)); // windowCalls.print(); // generate window contestant count JavaPairDStream<Integer, Integer> windowContestantNums = windowCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); JavaPairDStream<Integer, Integer> windContestantCounts = windowContestantNums.reduceByKey( new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); windContestantCounts.print(); // generate the accumulated count for contestants JavaPairDStream<Integer, Integer> totalContestantCounts = contestantVotes.updateStateByKey(updateFunction); // used for sorting PairFunction<Tuple2<Integer, Integer>, Integer, Integer> swapFunction = new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() { public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> in) { return in.swap(); } }; JavaPairDStream<Integer, Integer> swappedTotalContestantCounts = totalContestantCounts.mapToPair(swapFunction); JavaPairDStream<Integer, Integer> sortedTotalContestantCounts = swappedTotalContestantCounts.transformToPair( new Function<JavaPairRDD<Integer, Integer>, JavaPairRDD<Integer, Integer>>() { public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in) throws Exception { return in.sortByKey(false); } }); sortedTotalContestantCounts.print(); // make some statistics phoneCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { Long count = rdd.count(); // System.out.println( "count : " + count ); Double throughput = (count.doubleValue() * 1000 / batch_duration.doubleValue()); System.out.println("Current rate = " + throughput + " records / second"); XMemcachedClientBuilder builder = new XMemcachedClientBuilder(AddrUtil.getAddresses("localhost:11211")); XMemcachedClient client = (XMemcachedClient) builder.build(); client.setPrimitiveAsString(true); Long currentTimeStamp = System.currentTimeMillis(); // System.out.println("End time: " + currentTimeStamp); client.add(currentTimeStamp.toString(), 0, throughput); return null; } }); jssc.start(); // Start the computation jssc.awaitTermination(); // Wait for the computation to terminate }
public static void main(String[] args) { Logger logger = Logger.getRootLogger(); logger.setLevel(Level.OFF); String consumerKey = "JqQ1lAWg90PVD9U8XoDWedCm8"; String consumerSecret = "QaUe7V9HuYQvC031MVqpUuuP2OjieI0BBDEHLpFOR221zjQ0xp"; String accessToken = "3299869044-UVd8CwTfnDgcGFGPro2yGXKWhArKtXRxC6iekmH"; String accessTokenSecret = "3XtGQi1naI1V9wCVs2aQgEeVWr65vXDczOwGvqa3iGlEG"; System.setProperty("twitter4j.oauth.consumerKey", consumerKey); System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret); System.setProperty("twitter4j.oauth.accessToken", accessToken); System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret); String[] filters = {"bulling", "bullied", "bulling", "bullyed", "bully", "teased"}; SparkConf sparkConf = new SparkConf().setAppName("bullyhunter"); System.out.println("Started bullyhunter..."); JavaStreamingContext sc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); JavaReceiverInputDStream<Status> stream = TwitterUtils.createStream(sc, filters); JavaDStream<String> text = stream.map( new Function<Status, String>() { public String call(Status status) { // String msg = status.getText(); // String filtered_msg = Enrichment.filter(msg); // if (filtered_msg == null) { // return null; // } // TweetRecord tr = new TweetRecord(); // tr.setMsg(filtered_msg); // //tr.setGeo(status.getGeoLocation().getLatitude()); // String fullName = status.getPlace().getFullName(); // if (fullName == null) // return null; // String[] fields = fullName.spilt(DELIMITER); // tr.setCity(fullName.split()); String msg = status.getText(); double ind = Classification.classifyTweet(msg); if (ind > 0) { return status.getText(); } else { return null; } } }); // text = text.filter(new Function<String, Boolean>() { // public Boolean call(String msg) { // boolean containKeyword = false; // String lowerCase = msg.toLowerCase(); // for (String k : keywords) // if (lowerCase.contains(k)) { // containKeyword = true; // break; // } // if (containKeyword == true && lowerCase.contains("bull") // && !lowerCase.contains("RT")) { // return true; // } // return false; // } // // }); text = text.filter( new Function<String, Boolean>() { public Boolean call(String msg) { return (msg == null) ? false : true; } }); text.print(); sc.start(); sc.awaitTermination(); }
public static void main(String[] args) { // Create the context with a 10 second batch size SparkConf sparkConf = new SparkConf().setAppName("Assignment"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream( "localhost", Integer.parseInt("9999"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { List<String> allMatches = new ArrayList<String>(); Matcher matcher = SPACE.matcher(x); while (matcher.find()) { allMatches.add(matcher.group().toLowerCase()); } return Lists.newArrayList(allMatches.toArray(new String[0])); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Reduce function adding two integers, defined separately for clarity Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }; JavaPairDStream<String, Integer> windowedWordCounts = wordCounts.reduceByKeyAndWindow(reduceFunc, new Duration(30000), new Duration(10000)); windowedWordCounts.print(); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // sparkConf.setMaster("spark://60f81dc6426c:7077"); // SparkConf sparkConf = new // SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Logger.getLogger("org").setLevel(Level.OFF); Logger.getLogger("akka").setLevel(Level.OFF); Map<String, Integer> topicMap = new HashMap<String, Integer>(); String[] topics = args[2].split(","); for (String topic : topics) { topicMap.put(topic, numThreads); } /* for(String t: topic) { topicMap.put(t, new Integer(3)); }*/ // NotSerializable notSerializable = new NotSerializable(); // JavaRDD<String> rdd = sc.textFile("/tmp/myfile"); // rdd.map(s -> notSerializable.doSomething(s)).collect(); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); // JavaPairReceiverInputDStream<String, String> kafkaStream = // KafkaUtils.createStream(jssc, "localhost:2181","streamingContext", // topicMap); System.out.println("Connection !!!!"); /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>() { public String call(Tuple2<String, String> message) { return message._2(); } } );*/ JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }