@Override
  public void Start() {
    jssc.addStreamingListener(new PerformanceStreamingListener());

    //        jssc.checkpoint("/tmp/log-analyzer-streaming");
    jssc.checkpoint("hdfs://master:8020/usr/warehouse/wordcount/checkpoint");
    jssc.start();
    jssc.awaitTermination();
  }
  private static JavaStreamingContext createContext(String input, String checkpointDirectory) {
    System.out.println("Creating new context");
    // final File outputFile = new File("/flume_recover");
    // if (outputFile.exists()) {
    // outputFile.delete();
    // }

    SparkConf conf =
        new SparkConf()
            .setMaster("local[2]")
            .setAppName("Stream File")
            .set("spark.driver.allowMultipleContexts", "true");
    conf.set("spark.serializer", KryoSerializer.class.getName());
    conf.set("es.index.auto.create", "true");
    conf.set("es.nodes", "10.26.1.134:9200");
    conf.set("es.resource", "flume/test");
    conf.set("es.input.json", "true");

    JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(3000));
    jssc.checkpoint(checkpointDirectory);

    JavaDStream<String> textFile = jssc.textFileStream(input);
    JavaDStream<String> jsonStr =
        textFile.map(
            new Function<String, String>() {
              public String call(String arg0) throws Exception {
                Matcher m = log.matcher(arg0);
                if (m.find()) {
                  return transferJson(m);
                }
                return "";
              }
            });
    jsonStr.print();

    jsonStr.foreach(
        new Function<JavaRDD<String>, Void>() {
          public Void call(JavaRDD<String> arg0) throws Exception {
            if (!arg0.isEmpty() && arg0 != null) {
              JavaEsSpark.saveToEs(arg0, "flume/test");
            }
            return null;
          }
        });

    return jssc;
  }
Esempio n. 3
0
  public static void main(String[] args) {

    String master = System.getenv("MASTER");
    if (master == null) {
      master = "local[2]";
    }

    SparkConf conf = new SparkConf().setAppName("Voter Application").setMaster(master);

    Logger.getLogger("org").setLevel(Level.ERROR);
    Logger.getLogger("akka").setLevel(Level.ERROR);

    final Long batch_duration = Long.valueOf(args[0]);
    JavaStreamingContext jssc =
        new JavaStreamingContext(conf, new Duration(Integer.valueOf(args[0])));

    jssc.checkpoint(".");

    JavaReceiverInputDStream<String> votes = jssc.receiverStream(new Voter("localhost", 6789));

    // transform text line stream to PhoneCall stream
    JavaDStream<PhoneCall> phoneCalls =
        votes.map(
            new Function<String, PhoneCall>() {
              public PhoneCall call(String s) {
                return getPhoneCall(s);
              }
            });

    JavaDStream<Long> counts = votes.count();
    counts.print();

    // create updateFunction which is used to update the total call count for each phone number
    Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction =
        new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
          public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
            // add the new values with the previous running count to get the
            // new count
            Integer sum = 0;
            for (Integer i : values) {
              sum += i;
            }
            Integer newSum = sum + state.or(0);
            return Optional.of(newSum);
          }
        };

    //
    JavaPairDStream<Long, Integer> calls =
        phoneCalls.mapToPair(
            new PairFunction<PhoneCall, Long, Integer>() {
              public Tuple2<Long, Integer> call(PhoneCall x) {
                return new Tuple2<Long, Integer>(x.phoneNumber, 1);
              }
            });

    // generate the accumulated count for phone numbers
    final JavaPairDStream<Long, Integer> callNumberCounts = calls.updateStateByKey(updateFunction);
    // callNumberCounts.print();

    JavaPairDStream<Long, PhoneCall> pairVotes =
        phoneCalls.mapToPair(
            new PairFunction<PhoneCall, Long, PhoneCall>() {
              public Tuple2<Long, PhoneCall> call(PhoneCall call) throws Exception {
                return new Tuple2<Long, PhoneCall>(call.voteId, call);
              }
            });

    // generate the validate phone numbers, which is still allowed to send vote
    JavaPairDStream<Long, Integer> allowedCalls =
        callNumberCounts.filter(
            new Function<Tuple2<Long, Integer>, Boolean>() {

              public Boolean call(Tuple2<Long, Integer> v1) throws Exception {
                if (v1._2() > Voter.MAX_VOTES) return false;

                return true;
              }
            });

    // allowedCalls.print();

    // get validate contestant phone calls
    JavaDStream<PhoneCall> validContestantPhoneCalls =
        phoneCalls.filter(
            new Function<PhoneCall, Boolean>() {
              public Boolean call(PhoneCall call) {
                if (call.contestantNumber > Voter.NUM_CONTESTANTS) return false;
                return true;
              }
            });

    JavaPairDStream<Long, PhoneCall> anotherTemporyPhoneCalls =
        validContestantPhoneCalls.mapToPair(
            new PairFunction<PhoneCall, Long, PhoneCall>() {
              public Tuple2<Long, PhoneCall> call(PhoneCall x) {
                return new Tuple2<Long, PhoneCall>(x.phoneNumber, x);
              }
            });

    // get validate phone call records
    JavaPairDStream<Long, Tuple2<PhoneCall, Integer>> validatePhoneCalls =
        anotherTemporyPhoneCalls.join(allowedCalls);

    // validatePhoneCalls.print();

    JavaDStream<PhoneCall> validateCalls =
        validatePhoneCalls.transform(
            new Function<JavaPairRDD<Long, Tuple2<PhoneCall, Integer>>, JavaRDD<PhoneCall>>() {
              public JavaRDD<PhoneCall> call(JavaPairRDD<Long, Tuple2<PhoneCall, Integer>> v1)
                  throws Exception {
                JavaRDD<PhoneCall> item =
                    v1.map(
                        new Function<Tuple2<Long, Tuple2<PhoneCall, Integer>>, PhoneCall>() {
                          public PhoneCall call(Tuple2<Long, Tuple2<PhoneCall, Integer>> validItem)
                              throws Exception {
                            return validItem._2()._1();
                          }
                        });
                return item;
              }
            });

    // validateCalls.print();

    // save all votes with redis
    validateCalls.foreachRDD(
        new Function<JavaRDD<PhoneCall>, Void>() {

          public Void call(JavaRDD<PhoneCall> rdd) throws Exception {

            rdd.foreach(
                new VoidFunction<PhoneCall>() {

                  public void call(PhoneCall call) throws Exception {
                    // System.out.println(call.toString());
                    String key = String.valueOf(call.voteId);
                    String value = call.getContent();

                    // save <key,value> using redis
                    JedisPool pool = new JedisPool(new JedisPoolConfig(), "localhost");
                    Jedis jedis = pool.getResource();
                    try {
                      jedis.set(key, value);
                    } finally {
                      if (null != jedis) {
                        jedis.close();
                      }
                    }
                    /// ... when closing your application:
                    pool.destroy();
                  }
                });

            return null;
          }
        });

    // validate calls
    JavaPairDStream<Integer, Integer> contestantVotes =
        validateCalls.mapToPair(
            new PairFunction<PhoneCall, Integer, Integer>() {
              public Tuple2<Integer, Integer> call(PhoneCall x) {
                return new Tuple2<Integer, Integer>(x.contestantNumber, 1);
              }
            });

    // use window to get generate leaderboard
    Integer size = Integer.valueOf(args[1]);
    Integer slide = Integer.valueOf(args[2]);

    JavaDStream<PhoneCall> windowCalls =
        validateCalls.window(new Duration(size), new Duration(slide));
    // windowCalls.print();

    // generate window contestant count
    JavaPairDStream<Integer, Integer> windowContestantNums =
        windowCalls.mapToPair(
            new PairFunction<PhoneCall, Integer, Integer>() {
              public Tuple2<Integer, Integer> call(PhoneCall x) {
                return new Tuple2<Integer, Integer>(x.contestantNumber, 1);
              }
            });
    JavaPairDStream<Integer, Integer> windContestantCounts =
        windowContestantNums.reduceByKey(
            new Function2<Integer, Integer, Integer>() {
              public Integer call(Integer i1, Integer i2) throws Exception {

                return i1 + i2;
              }
            });
    windContestantCounts.print();

    // generate the accumulated count for contestants
    JavaPairDStream<Integer, Integer> totalContestantCounts =
        contestantVotes.updateStateByKey(updateFunction);

    // used for sorting
    PairFunction<Tuple2<Integer, Integer>, Integer, Integer> swapFunction =
        new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() {
          public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> in) {
            return in.swap();
          }
        };

    JavaPairDStream<Integer, Integer> swappedTotalContestantCounts =
        totalContestantCounts.mapToPair(swapFunction);

    JavaPairDStream<Integer, Integer> sortedTotalContestantCounts =
        swappedTotalContestantCounts.transformToPair(
            new Function<JavaPairRDD<Integer, Integer>, JavaPairRDD<Integer, Integer>>() {

              public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in)
                  throws Exception {
                return in.sortByKey(false);
              }
            });

    sortedTotalContestantCounts.print();

    // make some statistics
    phoneCalls.foreachRDD(
        new Function<JavaRDD<PhoneCall>, Void>() {

          public Void call(JavaRDD<PhoneCall> rdd) throws Exception {
            Long count = rdd.count();
            // System.out.println( "count : " + count );
            Double throughput = (count.doubleValue() * 1000 / batch_duration.doubleValue());
            System.out.println("Current rate = " + throughput + " records / second");

            XMemcachedClientBuilder builder =
                new XMemcachedClientBuilder(AddrUtil.getAddresses("localhost:11211"));
            XMemcachedClient client = (XMemcachedClient) builder.build();
            client.setPrimitiveAsString(true);

            Long currentTimeStamp = System.currentTimeMillis();
            // System.out.println("End time: " + currentTimeStamp);
            client.add(currentTimeStamp.toString(), 0, throughput);

            return null;
          }
        });

    jssc.start(); // Start the computation
    jssc.awaitTermination(); // Wait for the computation to terminate
  }