public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("Example").setMaster("local[2]");
    // Create a StreamingContext with a 1-second batch size from a SparkConf
    JavaStreamingContext jssc =
        new JavaStreamingContext(
            conf, Durations.seconds(10)); // Create a DStream from all the input on port 7777
    JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
    // Filter our DStream for lines with "error"

    // Split up into words.
    final JavaDStream<String> wordDStream =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              public Iterable<String> call(String x) {
                return Arrays.asList(x.split(" "));
              }
            });

    final JavaPairDStream<String, Integer> wordPairDStream =
        wordDStream.mapToPair(
            new PairFunction<String, String, Integer>() {
              @Override
              public Tuple2<String, Integer> call(String s) throws Exception {
                return new Tuple2<String, Integer>(s, 1);
              }
            });

    final JavaPairDStream<String, Integer> totalWordPairDStream =
        wordPairDStream.reduceByKeyAndWindow(
            new Function2<Integer, Integer, Integer>() {
              @Override
              public Integer call(Integer a, Integer b) throws Exception {
                return a + b;
              }
            },
            Durations.seconds(30),
            Durations.seconds(20));

    //        totalWordPairDStream.foreach(new Function2<JavaPairRDD<String, Integer>, Time, Void>()
    // {
    //            @Override
    //            public Void call(JavaPairRDD<String, Integer> wordPairRDD, Time time) throws
    // Exception {
    //
    //                final List<Tuple2<String, Integer>> collect = wordPairRDD.collect();
    //                for (Tuple2<String, Integer> t : collect) {
    //                    System.out.println("the value is t" +  t._1() + "," + t._2());
    //                }
    //
    //                return null;
    //            }
    //        });

    totalWordPairDStream.print();

    // Start our streaming context and wait for it to "finish"
    jssc.start();
    // Wait for the job to finish
    jssc.awaitTermination();
  }
  public static void main(String[] args) {
    SparkConf conf =
        new SparkConf()
            .setMaster("local[4]")
            .setAppName("SparkStreamingPullDataFromFlume for Java");
    JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(30));
    //		JavaReceiverInputDStream<SparkFlumeEvent> lines = FlumeUtils.createStream(jsc,"master1",
    // 9999); flume push data to Spark Streaming
    JavaReceiverInputDStream<SparkFlumeEvent> lines =
        FlumeUtils.createPollingStream(
            jsc, "master1", 9999); // Spark Streaming pull data from flume
    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<SparkFlumeEvent, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Iterable<String> call(SparkFlumeEvent event) throws Exception {
                String line = new String(event.event().getBody().array());
                return Arrays.asList(line.split(" "));
              }
            });

    JavaPairDStream<String, Integer> pairs =
        words.mapToPair(
            new PairFunction<String, String, Integer>() {

              private static final long serialVersionUID = 1L;

              @Override
              public Tuple2<String, Integer> call(String word) throws Exception {
                return new Tuple2<String, Integer>(word, 1);
              }
            });

    JavaPairDStream<String, Integer> wordsCount =
        pairs.reduceByKey(
            new Function2<
                Integer, Integer, Integer>() { // 对相同的Key,进行Value的累计(包括Local和Reducer级别同时Reduce)

              private static final long serialVersionUID = 1L;

              @Override
              public Integer call(Integer v1, Integer v2) throws Exception {
                return v1 + v2;
              }
            });

    wordsCount.print();

    jsc.start();

    jsc.awaitTermination();
    jsc.close();
  }
 public static void main(String[] args) throws IOException {
   SparkConf conf = new SparkConf().setAppName("faebookStream");
   JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(15));
   JavaDStream<String> stream =
       jssc.textFileStream("/Users/sboynenpalli/Desktop/shashankOfficeMackbook/sparkinputfolder");
   stream.print(10);
   ProcessVideoStreamData processData = new ProcessVideoStreamData();
   processData.processData(stream);
   jssc.start();
   jssc.awaitTermination();
 }
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
      System.exit(1);
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines =
        ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterator<String> call(String x) {
                return Arrays.asList(SPACE.split(x)).iterator();
              }
            });

    // Convert RDDs of the words DStream to DataFrame and run SQL query
    words.foreachRDD(
        new VoidFunction2<JavaRDD<String>, Time>() {
          @Override
          public void call(JavaRDD<String> rdd, Time time) {
            SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());

            // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
            JavaRDD<JavaRecord> rowRDD =
                rdd.map(
                    new Function<String, JavaRecord>() {
                      @Override
                      public JavaRecord call(String word) {
                        JavaRecord record = new JavaRecord();
                        record.setWord(word);
                        return record;
                      }
                    });
            Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);

            // Register as table
            wordsDataFrame.createOrReplaceTempView("words");

            // Do word count on table using SQL and print it
            Dataset<Row> wordCountsDataFrame =
                spark.sql("select word, count(*) as total from words group by word");
            System.out.println("========= " + time + "=========");
            wordCountsDataFrame.show();
          }
        });

    ssc.start();
    ssc.awaitTermination();
  }
  public void run() {

    System.setProperty("spark.hadoop.dfs.replication", "2");

    Logger.getLogger("org").setLevel(Level.OFF);
    Logger.getLogger("akka").setLevel(Level.OFF);

    SparkConf conf = new SparkConf().setAppName("WindowingKafkaWordCountWithFaultTolerance");
    conf.set("spark.master", PropertiesStack.getProperty("spark.master"));
    conf.set("spark.executor.memory", PropertiesStack.getProperty("spark.executor.memory"));
    conf.set("spark.driver.memory", PropertiesStack.getProperty("spark.driver.memory"));
    conf.set(
        "spark.driver.maxResultSize", PropertiesStack.getProperty("spark.driver.maxResultSize"));
    // .setAppName("WindowingKafkaWordCountWithoutFaultTolerance");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(10));

    HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(PropertiesStack.getKafkaTopic()));

    HashMap<String, String> kafkaParams = new HashMap<String, String>();
    kafkaParams.put("metadata.broker.list", PropertiesStack.getKafkaBootstrapServers());
    kafkaParams.put("zookeeper.connect", PropertiesStack.getZookeeperConnect());
    kafkaParams.put("auto.offset.reset", "smallest");
    kafkaParams.put("group.id", PropertiesStack.getKafkaGroupId());
    kafkaParams.put("auto.commit.enable", "false");

    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(PropertiesStack.getKafkaTopic(), 1);
    //		Map<kafka.common.TopicAndPartition, java.lang.Long> fromOffsets = new HashMap<>();
    //		fromOffsets.put(new TopicAndPartition(PropertiesStack.getKafkaTopic(),
    //				1), 1000L);
    // Create direct kafka stream with brokers and topics
    //		JavaInputDStream<String> messages = KafkaUtils
    //				.createDirectStream(
    //						jssc,
    //						String.class,
    //						String.class,
    //						StringDecoder.class,
    //						StringDecoder.class,
    //						String.class,
    //						kafkaParams,
    //						fromOffsets,
    //						new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
    //							@Override
    //							public String call(
    //									MessageAndMetadata<String, String> v1)
    //									throws Exception {
    //								return v1.message();
    //							}
    //						});
    JavaPairInputDStream<String, String> messages =
        KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet);
    messages.count().print();
    // .createStream(jssc, PropertiesStack.getZookeeperConnect(),
    // PropertiesStack.getKafkaGroupId(), topicMap);

    // Start the computation
    jssc.start();
    jssc.awaitTermination();
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println(
          "Usage: JavaDirectKafkaWordCount <brokers> <topics>\n"
              + "  <brokers> is a list of one or more Kafka brokers\n"
              + "  <topics> is a list of one or more kafka topics to consume from\n\n");
      System.exit(1);
    }

    String brokers = args[0];
    String topics = args[1];

    // Create context with a 2 seconds batch interval
    SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));

    Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
    Map<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", brokers);

    // Create direct kafka stream with brokers and topics
    JavaPairInputDStream<String, String> messages =
        KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet);

    // Get the lines, split them into words, count the words and print
    JavaDStream<String> lines =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterable<String> call(String x) {
                return Arrays.asList(SPACE.split(x));
              }
            });
    JavaPairDStream<String, Integer> wordCounts =
        words
            .mapToPair(
                new PairFunction<String, String, Integer>() {
                  @Override
                  public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                  }
                })
            .reduceByKey(
                new Function2<Integer, Integer, Integer>() {
                  @Override
                  public Integer call(Integer i1, Integer i2) {
                    return i1 + i2;
                  }
                });
    wordCounts.print();

    // Start the computation
    jssc.start();
    jssc.awaitTermination();
  }
示例#7
0
  public static void main(String[] args) {
    Logger logger = Logger.getRootLogger();
    logger.setLevel(Level.OFF);

    String consumerKey = "JqQ1lAWg90PVD9U8XoDWedCm8";
    String consumerSecret = "QaUe7V9HuYQvC031MVqpUuuP2OjieI0BBDEHLpFOR221zjQ0xp";
    String accessToken = "3299869044-UVd8CwTfnDgcGFGPro2yGXKWhArKtXRxC6iekmH";
    String accessTokenSecret = "3XtGQi1naI1V9wCVs2aQgEeVWr65vXDczOwGvqa3iGlEG";

    System.setProperty("twitter4j.oauth.consumerKey", consumerKey);
    System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret);
    System.setProperty("twitter4j.oauth.accessToken", accessToken);
    System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret);

    String[] filters = {"bulling", "bullied", "bulling", "bullyed", "bully", "teased"};

    SparkConf sparkConf = new SparkConf().setAppName("bullyhunter");
    System.out.println("Started bullyhunter...");
    JavaStreamingContext sc = new JavaStreamingContext(sparkConf, Durations.seconds(2));
    JavaReceiverInputDStream<Status> stream = TwitterUtils.createStream(sc, filters);

    JavaDStream<String> text =
        stream.map(
            new Function<Status, String>() {
              public String call(Status status) {
                //                        String msg = status.getText();
                //                        String filtered_msg = Enrichment.filter(msg);
                //                        if (filtered_msg == null) {
                //                            return null;
                //                        }
                //                        TweetRecord tr = new TweetRecord();
                //                        tr.setMsg(filtered_msg);
                //                        //tr.setGeo(status.getGeoLocation().getLatitude());
                //                        String fullName = status.getPlace().getFullName();
                //                        if (fullName == null)
                //                            return null;
                //                        String[] fields = fullName.spilt(DELIMITER);
                //                        tr.setCity(fullName.split());
                String msg = status.getText();
                double ind = Classification.classifyTweet(msg);
                if (ind > 0) {
                  return status.getText();
                } else {
                  return null;
                }
              }
            });

    //        text = text.filter(new Function<String, Boolean>() {
    //            public Boolean call(String msg) {
    //                boolean containKeyword = false;
    //                String lowerCase = msg.toLowerCase();
    //                for (String k : keywords)
    //                    if (lowerCase.contains(k)) {
    //                        containKeyword = true;
    //                        break;
    //                    }
    //                if (containKeyword == true && lowerCase.contains("bull")
    //                        && !lowerCase.contains("RT")) {
    //                    return true;
    //                }
    //                return false;
    //            }
    //
    //        });
    text =
        text.filter(
            new Function<String, Boolean>() {
              public Boolean call(String msg) {
                return (msg == null) ? false : true;
              }
            });

    text.print();
    sc.start();
    sc.awaitTermination();
  }