@Override public WorkloadOperator<String> stringStreamFromKafka( String zkConStr, String kafkaServers, String group, String topics, String offset, String componentId, int parallelism) { HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", kafkaServers); kafkaParams.put("auto.offset.reset", offset); kafkaParams.put("zookeeper.connect", zkConStr); kafkaParams.put("group.id", group); // Create direct kafka stream with brokers and topics JavaPairDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); JavaDStream<String> lines = messages.map(mapFunction); return new SparkWorkloadOperator<>(lines, parallelism); }
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("Example").setMaster("local[2]"); // Create a StreamingContext with a 1-second batch size from a SparkConf JavaStreamingContext jssc = new JavaStreamingContext( conf, Durations.seconds(10)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" // Split up into words. final JavaDStream<String> wordDStream = lines.flatMap( new FlatMapFunction<String, String>() { public Iterable<String> call(String x) { return Arrays.asList(x.split(" ")); } }); final JavaPairDStream<String, Integer> wordPairDStream = wordDStream.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { return new Tuple2<String, Integer>(s, 1); } }); final JavaPairDStream<String, Integer> totalWordPairDStream = wordPairDStream.reduceByKeyAndWindow( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer a, Integer b) throws Exception { return a + b; } }, Durations.seconds(30), Durations.seconds(20)); // totalWordPairDStream.foreach(new Function2<JavaPairRDD<String, Integer>, Time, Void>() // { // @Override // public Void call(JavaPairRDD<String, Integer> wordPairRDD, Time time) throws // Exception { // // final List<Tuple2<String, Integer>> collect = wordPairRDD.collect(); // for (Tuple2<String, Integer> t : collect) { // System.out.println("the value is t" + t._1() + "," + t._2()); // } // // return null; // } // }); totalWordPairDStream.print(); // Start our streaming context and wait for it to "finish" jssc.start(); // Wait for the job to finish jssc.awaitTermination(); }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setMaster("local[4]") .setAppName("SparkStreamingPullDataFromFlume for Java"); JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(30)); // JavaReceiverInputDStream<SparkFlumeEvent> lines = FlumeUtils.createStream(jsc,"master1", // 9999); flume push data to Spark Streaming JavaReceiverInputDStream<SparkFlumeEvent> lines = FlumeUtils.createPollingStream( jsc, "master1", 9999); // Spark Streaming pull data from flume JavaDStream<String> words = lines.flatMap( new FlatMapFunction<SparkFlumeEvent, String>() { private static final long serialVersionUID = 1L; @Override public Iterable<String> call(SparkFlumeEvent event) throws Exception { String line = new String(event.event().getBody().array()); return Arrays.asList(line.split(" ")); } }); JavaPairDStream<String, Integer> pairs = words.mapToPair( new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String, Integer>(word, 1); } }); JavaPairDStream<String, Integer> wordsCount = pairs.reduceByKey( new Function2< Integer, Integer, Integer>() { // 对相同的Key,进行Value的累计(包括Local和Reducer级别同时Reduce) private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1 + v2; } }); wordsCount.print(); jsc.start(); jsc.awaitTermination(); jsc.close(); }
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: JavaQueueStream <master>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context JavaStreamingContext ssc = new JavaStreamingContext( args[0], "QueueStream", new Duration(1000), System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaQueueStream.class)); // Create the queue through which RDDs can be pushed to // a QueueInputDStream Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>(); // Create and push some RDDs into the queue List<Integer> list = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { list.add(i); } for (int i = 0; i < 30; i++) { rddQueue.add(ssc.sparkContext().parallelize(list)); } // Create the QueueInputDStream and use it do some processing JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue); JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair( new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer i) { return new Tuple2<Integer, Integer>(i % 10, 1); } }); JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); reducedStream.print(); ssc.start(); ssc.awaitTermination(); }
@Test @SuppressWarnings("unchecked") public void testJavaPairDStreamFunctions() throws Exception { JavaPairDStream<String, String> mockJavaDStream = mock(JavaPairDStream.class); DStream<Tuple2<String, String>> mockDStream = mock(DStream.class); when(mockJavaDStream.dstream()).thenReturn(mockDStream); GemFireJavaPairDStreamFunctions wrapper = javaFunctions(mockJavaDStream); assertTrue(mockDStream == wrapper.dsf.dstream()); Tuple3<SparkContext, GemFireConnectionConf, GemFireConnection> tuple3 = createCommonMocks(); String regionPath = "testregion"; wrapper.saveToGemfire(regionPath, tuple3._2()); verify(tuple3._2()).getConnection(); verify(tuple3._3()).validateRegion(regionPath); verify(mockDStream).foreachRDD(any(Function1.class)); }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println( "Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1); } String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Arrays.asList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
public static void main(String[] args) { if (args.length < 5) { System.out.println( "Usage: StreamProducerJava <infinispan_host> <twitter4j.oauth.consumerKey> <twitter4j.oauth.consumerSecret> <twitter4j.oauth.accessToken> <twitter4j.oauth.accessTokenSecret>"); System.exit(1); } String infinispanHost = args[0]; System.setProperty("twitter4j.oauth.consumerKey", args[1]); System.setProperty("twitter4j.oauth.consumerSecret", args[2]); System.setProperty("twitter4j.oauth.accessToken", args[3]); System.setProperty("twitter4j.oauth.accessTokenSecret", args[4]); // Reduce the log level in the driver Logger.getLogger("org").setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("spark-infinispan-stream-producer-java"); // Create the streaming context JavaStreamingContext javaStreamingContext = new JavaStreamingContext(conf, Seconds.apply(1)); // Populate infinispan properties Properties infinispanProperties = new Properties(); infinispanProperties.put("infinispan.client.hotrod.server_list", infinispanHost); JavaReceiverInputDStream<Status> twitterDStream = TwitterUtils.createStream(javaStreamingContext); // Transform from twitter4j.Status to our domain model org.infinispan.spark.demo.twitter.Tweet JavaDStream<Tuple2<Long, Tweet>> kvPair = twitterDStream.map( status -> new Tuple2<>( status.getId(), new Tweet( status.getId(), status.getUser().getScreenName(), Optional.ofNullable(status.getPlace()) .map(Place::getCountry) .orElseGet(() -> "N/A"), status.getRetweetCount(), status.getText()))); // Write the stream to infinispan InfinispanJavaDStream.writeToInfinispan(kvPair, infinispanProperties); // Create InfinispanInputDStream JavaInputDStream<Tuple3<Long, Tweet, ClientEvent.Type>> infinispanInputDStream = InfinispanJavaDStream.createInfinispanInputDStream( javaStreamingContext, MEMORY_ONLY(), infinispanProperties); // Apply a transformation to the RDDs to aggregate by country JavaPairDStream<String, Integer> countryDStream = infinispanInputDStream.transformToPair( rdd -> { return rdd.filter(ev -> !ev._2().getCountry().equals("N/A")) .mapToPair(event -> new Tuple2<>(event._2().getCountry(), 1)) .reduceByKey((a, b) -> a + b); }); // Since we are interested in the last 60 seconds only, we restrict the DStream by window, // collapsing all the RDDs: JavaPairDStream<String, Integer> lastMinuteStream = countryDStream.reduceByKeyAndWindow((a, b) -> a + b, new Duration(60 * 1000)); lastMinuteStream.foreachRDD( (rdd, time) -> { System.out.format("---------- %s ----------\n", time.toString()); List<Tuple2<String, Integer>> results = rdd.collect(); results .stream() .sorted((o1, o2) -> o2._2().compareTo(o1._2())) .forEach(t -> System.out.format("[%s,%d]\n", t._1(), t._2())); return null; }); // Start the processing javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
public JavaStreamingContext createContext() { CassandraConnectionContext connectionContext = connectToCassandra(); JavaDStream<String> rdd = connectionContext.getRDD(); JavaStreamingContext sparkContext = connectionContext.getStreamingContext(); final CassandraConnector connector = connectionContext.getCassandraConnector(); final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); JavaDStream<Tuple2<String, Transaction>> transactionList = rdd.map( new Function<String, Tuple2<String, Transaction>>() { @Override public Tuple2<String, Transaction> call(String line) throws Exception { String[] columns = line.split(","); String taxId = columns[0]; String name = columns[1]; String merchant = columns[2]; BigDecimal amount = new BigDecimal(columns[3]); Date transactionDate; try { transactionDate = format.parse(columns[4]); } catch (ParseException e) { e.printStackTrace(); throw new RuntimeException(e); } String tranId = columns[5]; System.out.println(line); Tuple2<String, Transaction> transaction = new Tuple2<>( taxId, new Transaction(name, merchant, amount, transactionDate, tranId)); return transaction; } }); transactionList.cache(); final String warningsTableName = "warnings"; try (Session session = connector.openSession()) { session.execute( String.format("DROP TABLE IF EXISTS %s.%s", getKeySpaceName(), warningsTableName)); session.execute( String.format( "CREATE TABLE IF NOT EXISTS %s.%s (ssn text, " + "id uuid, amount decimal, rule text, PRIMARY KEY(ssn, id))", getKeySpaceName(), warningsTableName)); } // setup warning on more than certain number of transactions by user in a 60 second window, // every 10 seconds JavaPairDStream<String, BigDecimal> warnings = transactionList .window(new Duration(60000), new Duration(10000)) .mapToPair( new PairFunction<Tuple2<String, Transaction>, String, BigDecimal>() { @Override public Tuple2<String, BigDecimal> call(Tuple2<String, Transaction> transaction) throws Exception { String taxId = transaction._1(); BigDecimal amount = transaction._2().getAmount(); return new Tuple2<>(taxId, amount); } }) .reduceByKey( new Function2<BigDecimal, BigDecimal, BigDecimal>() { @Override public BigDecimal call( BigDecimal transactionAmount1, BigDecimal transactionAmount2) throws Exception { return transactionAmount1.add(transactionAmount2); } }) .filter( new Function<Tuple2<String, BigDecimal>, Boolean>() { @Override public Boolean call(Tuple2<String, BigDecimal> transactionSumByTaxId) throws Exception { // returns true if total is greater than 999 Boolean result = transactionSumByTaxId._2().compareTo(new BigDecimal(999)) == 1; System.out.println( "tran " + transactionSumByTaxId._1() + " with amount " + transactionSumByTaxId._2()); if (result) { System.out.println( "warning " + transactionSumByTaxId._1() + " has value " + transactionSumByTaxId._2() + " is greater than 999"); } return result; } }); JavaDStream<Warning> mappedWarnings = warnings.map( new Function<Tuple2<String, BigDecimal>, Warning>() { @Override public Warning call(Tuple2<String, BigDecimal> warnings) throws Exception { Warning warning = new Warning(); warning.setSsn(warnings._1()); warning.setId(UUIDs.timeBased()); warning.setAmount(warnings._2()); warning.setRule("OVER_DOLLAR_AMOUNT"); return warning; } }); javaFunctions(mappedWarnings) .writerBuilder(getKeySpaceName(), warningsTableName, mapToRow(Warning.class)) .saveToCassandra(); return sparkContext; }
public static void main(String[] args) { String master = System.getenv("MASTER"); if (master == null) { master = "local[2]"; } SparkConf conf = new SparkConf().setAppName("Voter Application").setMaster(master); Logger.getLogger("org").setLevel(Level.ERROR); Logger.getLogger("akka").setLevel(Level.ERROR); final Long batch_duration = Long.valueOf(args[0]); JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(Integer.valueOf(args[0]))); jssc.checkpoint("."); JavaReceiverInputDStream<String> votes = jssc.receiverStream(new Voter("localhost", 6789)); // transform text line stream to PhoneCall stream JavaDStream<PhoneCall> phoneCalls = votes.map( new Function<String, PhoneCall>() { public PhoneCall call(String s) { return getPhoneCall(s); } }); JavaDStream<Long> counts = votes.count(); counts.print(); // create updateFunction which is used to update the total call count for each phone number Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction = new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { // add the new values with the previous running count to get the // new count Integer sum = 0; for (Integer i : values) { sum += i; } Integer newSum = sum + state.or(0); return Optional.of(newSum); } }; // JavaPairDStream<Long, Integer> calls = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, Integer>() { public Tuple2<Long, Integer> call(PhoneCall x) { return new Tuple2<Long, Integer>(x.phoneNumber, 1); } }); // generate the accumulated count for phone numbers final JavaPairDStream<Long, Integer> callNumberCounts = calls.updateStateByKey(updateFunction); // callNumberCounts.print(); JavaPairDStream<Long, PhoneCall> pairVotes = phoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall call) throws Exception { return new Tuple2<Long, PhoneCall>(call.voteId, call); } }); // generate the validate phone numbers, which is still allowed to send vote JavaPairDStream<Long, Integer> allowedCalls = callNumberCounts.filter( new Function<Tuple2<Long, Integer>, Boolean>() { public Boolean call(Tuple2<Long, Integer> v1) throws Exception { if (v1._2() > Voter.MAX_VOTES) return false; return true; } }); // allowedCalls.print(); // get validate contestant phone calls JavaDStream<PhoneCall> validContestantPhoneCalls = phoneCalls.filter( new Function<PhoneCall, Boolean>() { public Boolean call(PhoneCall call) { if (call.contestantNumber > Voter.NUM_CONTESTANTS) return false; return true; } }); JavaPairDStream<Long, PhoneCall> anotherTemporyPhoneCalls = validContestantPhoneCalls.mapToPair( new PairFunction<PhoneCall, Long, PhoneCall>() { public Tuple2<Long, PhoneCall> call(PhoneCall x) { return new Tuple2<Long, PhoneCall>(x.phoneNumber, x); } }); // get validate phone call records JavaPairDStream<Long, Tuple2<PhoneCall, Integer>> validatePhoneCalls = anotherTemporyPhoneCalls.join(allowedCalls); // validatePhoneCalls.print(); JavaDStream<PhoneCall> validateCalls = validatePhoneCalls.transform( new Function<JavaPairRDD<Long, Tuple2<PhoneCall, Integer>>, JavaRDD<PhoneCall>>() { public JavaRDD<PhoneCall> call(JavaPairRDD<Long, Tuple2<PhoneCall, Integer>> v1) throws Exception { JavaRDD<PhoneCall> item = v1.map( new Function<Tuple2<Long, Tuple2<PhoneCall, Integer>>, PhoneCall>() { public PhoneCall call(Tuple2<Long, Tuple2<PhoneCall, Integer>> validItem) throws Exception { return validItem._2()._1(); } }); return item; } }); // validateCalls.print(); // save all votes with redis validateCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { rdd.foreach( new VoidFunction<PhoneCall>() { public void call(PhoneCall call) throws Exception { // System.out.println(call.toString()); String key = String.valueOf(call.voteId); String value = call.getContent(); // save <key,value> using redis JedisPool pool = new JedisPool(new JedisPoolConfig(), "localhost"); Jedis jedis = pool.getResource(); try { jedis.set(key, value); } finally { if (null != jedis) { jedis.close(); } } /// ... when closing your application: pool.destroy(); } }); return null; } }); // validate calls JavaPairDStream<Integer, Integer> contestantVotes = validateCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); // use window to get generate leaderboard Integer size = Integer.valueOf(args[1]); Integer slide = Integer.valueOf(args[2]); JavaDStream<PhoneCall> windowCalls = validateCalls.window(new Duration(size), new Duration(slide)); // windowCalls.print(); // generate window contestant count JavaPairDStream<Integer, Integer> windowContestantNums = windowCalls.mapToPair( new PairFunction<PhoneCall, Integer, Integer>() { public Tuple2<Integer, Integer> call(PhoneCall x) { return new Tuple2<Integer, Integer>(x.contestantNumber, 1); } }); JavaPairDStream<Integer, Integer> windContestantCounts = windowContestantNums.reduceByKey( new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); windContestantCounts.print(); // generate the accumulated count for contestants JavaPairDStream<Integer, Integer> totalContestantCounts = contestantVotes.updateStateByKey(updateFunction); // used for sorting PairFunction<Tuple2<Integer, Integer>, Integer, Integer> swapFunction = new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() { public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> in) { return in.swap(); } }; JavaPairDStream<Integer, Integer> swappedTotalContestantCounts = totalContestantCounts.mapToPair(swapFunction); JavaPairDStream<Integer, Integer> sortedTotalContestantCounts = swappedTotalContestantCounts.transformToPair( new Function<JavaPairRDD<Integer, Integer>, JavaPairRDD<Integer, Integer>>() { public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in) throws Exception { return in.sortByKey(false); } }); sortedTotalContestantCounts.print(); // make some statistics phoneCalls.foreachRDD( new Function<JavaRDD<PhoneCall>, Void>() { public Void call(JavaRDD<PhoneCall> rdd) throws Exception { Long count = rdd.count(); // System.out.println( "count : " + count ); Double throughput = (count.doubleValue() * 1000 / batch_duration.doubleValue()); System.out.println("Current rate = " + throughput + " records / second"); XMemcachedClientBuilder builder = new XMemcachedClientBuilder(AddrUtil.getAddresses("localhost:11211")); XMemcachedClient client = (XMemcachedClient) builder.build(); client.setPrimitiveAsString(true); Long currentTimeStamp = System.currentTimeMillis(); // System.out.println("End time: " + currentTimeStamp); client.add(currentTimeStamp.toString(), 0, throughput); return null; } }); jssc.start(); // Start the computation jssc.awaitTermination(); // Wait for the computation to terminate }
public static void main(String[] args) { // Create the context with a 10 second batch size SparkConf sparkConf = new SparkConf().setAppName("Assignment"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream( "localhost", Integer.parseInt("9999"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { List<String> allMatches = new ArrayList<String>(); Matcher matcher = SPACE.matcher(x); while (matcher.find()) { allMatches.add(matcher.group().toLowerCase()); } return Lists.newArrayList(allMatches.toArray(new String[0])); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Reduce function adding two integers, defined separately for clarity Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }; JavaPairDStream<String, Integer> windowedWordCounts = wordCounts.reduceByKeyAndWindow(reduceFunc, new Duration(30000), new Duration(10000)); windowedWordCounts.print(); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // sparkConf.setMaster("spark://60f81dc6426c:7077"); // SparkConf sparkConf = new // SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Logger.getLogger("org").setLevel(Level.OFF); Logger.getLogger("akka").setLevel(Level.OFF); Map<String, Integer> topicMap = new HashMap<String, Integer>(); String[] topics = args[2].split(","); for (String topic : topics) { topicMap.put(topic, numThreads); } /* for(String t: topic) { topicMap.put(t, new Integer(3)); }*/ // NotSerializable notSerializable = new NotSerializable(); // JavaRDD<String> rdd = sc.textFile("/tmp/myfile"); // rdd.map(s -> notSerializable.doSomething(s)).collect(); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); // JavaPairReceiverInputDStream<String, String> kafkaStream = // KafkaUtils.createStream(jssc, "localhost:2181","streamingContext", // topicMap); System.out.println("Connection !!!!"); /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>() { public String call(Tuple2<String, String> message) { return message._2(); } } );*/ JavaDStream<String> lines = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words .mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }) .reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }