コード例 #1
0
  public static void main(String[] args) {
    if (args.length < 4) {
      System.err.println("Usage: PDCKafkaConsumer <zkQuorum> <group> <topics> <numThreads>");
      System.exit(1);
    }

    String zkQuorum = args[0];
    String kfGrp = args[1];
    String[] topics = args[2].split(",");
    int numThreads = Integer.valueOf(args[3]);

    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    for (String topic : topics) {
      topicMap.put(topic, numThreads);
    }

    SparkConf conf = new SparkConf().setAppName("PDCKafkaConsumer");
    conf.set("spark.ui.port", "4040");
    JavaStreamingContext ctx = new JavaStreamingContext(conf, new Duration(10000));
    JavaPairReceiverInputDStream<String, String> kfStream =
        KafkaUtils.createStream(ctx, zkQuorum, kfGrp, topicMap);
    kfStream.saveAsHadoopFiles(
        "/phasor/pmu/pdc", "in", Text.class, Text.class, TextOutputFormat.class);

    ctx.start();
    ctx.awaitTermination();
  }
コード例 #2
0
  public static void main(String[] args) {
    // Create a Spark Context.
    SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true");
    ;
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL);
    String TOPIC = "activityevent";
    String zkQuorum = "localhost:2181";
    String group = "1";
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(TOPIC, 1);

    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);
    // messages.print();
    JavaDStream<String> activitydatastream =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    final Long teamWindowDurationMs = Durations.minutes(1).milliseconds();
    JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine);
    JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream =
        ActivityEntryDStream.mapToPair(
                windows ->
                    new Tuple2<>(
                        WithTimestamp.create(
                            windows.getActivity(),
                            // Apply Fixed Window by rounding the timestamp down to the nearest
                            // multiple of the window size
                            (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs)
                                * teamWindowDurationMs),
                        windows.getXaxis()))
            .reduceByKey(SUM_REDUCER);

    ActivityWindowDStream.print();

    jssc.start();
    jssc.awaitTermination();
    // jssc.close();
    sc.stop();
    sc.close();
  }
コード例 #3
0
  public static void main(String[] args) throws IOException, ParseException {
    KafkaConsumerHelper helper = new KafkaConsumerHelper(args);
    JavaStreamingContext jssc = null;
    try {
      jssc = helper.createJavaStreamingContext(APP_NAME);
      JavaPairReceiverInputDStream<String, String> rs = helper.createReceiverStream(jssc);

      Configuration conf = new Configuration();
      conf.set("fs.defaultFS", "hdfs://localhost:9000");
      conf.set("dfs.replication", "1");
      conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
      conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

      rs.saveAsNewAPIHadoopFiles(
          "prefix", "txt", Integer.class, Integer.class, TextOutputFormat.class, conf);

      //            rs.saveAsHadoopFiles("qwe_prefix_", "_qwe_suffix", );
      //            JavaDStream<Text> map = rs.map(t -> new Text(t._2()));

      //            rs.foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() {
      //                @Override
      //                public void call(JavaPairRDD<String, String> pair) throws Exception {
      //                    pair.saveAsHadoopFile();
      //                }
      //            });

      Utils.consolog("start...awaitTermination...    (type 'Q' to finish)");
      helper.startAndWait(jssc);

    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      if (jssc != null) {
        Utils.consolog("stopping...closing...");
        helper.stopAndClose(jssc);

        System.out.println("~~~~~~~~~~~~~~~~~~~~~~kafkaStream.saveAsHadoopFiles");
      }
      Utils.consolog("~~ DONE ~~");
    }
  }
コード例 #4
0
  /**
   * We get pairs of <key,message> the key is a simple UUID The message format is
   * <producer-id>_<msg-value> Ignore the keys and work on the messages Split the messages according
   * to "_" Create a new set of pairs with <producer-id,msg-value>
   */
  protected static void countMessagesPerProducer(
      JavaPairReceiverInputDStream<String, String> messages) {
    messages.foreachRDD(
        new VoidFunction2<JavaPairRDD<String, String>, Time>() {
          @Override
          public void call(JavaPairRDD<String, String> pairs, Time time) throws Exception {
            if (pairs.count() > 0) {
              Utils.consolog("outer ==> pairs.count=[" + pairs.count() + "] time=[" + time + "]");

              JavaPairRDD<String, String> messageAndProducerId =
                  pairs.<String, String>mapToPair(
                      new PairFunction<Tuple2<String, String>, String, String>() {
                        @Override
                        public Tuple2<String, String> call(Tuple2<String, String> t)
                            throws Exception {
                          String[] split = t._2().split("_");
                          String producerId = split[0];
                          String msgValue = split[1].trim();
                          return new Tuple2<>(producerId, msgValue);
                        }
                      });
              Map<String, Object> counters = messageAndProducerId.countByKey();
              if (!counters.isEmpty()) {
                StringBuilder buf = new StringBuilder();
                buf.append("{ ");
                ArrayList<String> sortedKeys = new ArrayList<>(counters.keySet());
                sortedKeys.sort((o1, o2) -> Integer.valueOf(o1).compareTo(Integer.valueOf(o2)));
                for (String key : sortedKeys) {
                  Object value = counters.get(key);
                  buf.append("(" + key + "," + value + "),");
                }
                buf.append(" }");
                Utils.consolog("inner ==> " + buf.toString());
              }
            }
          }
        });
  }
コード例 #5
0
  public static void main(String[] args) {
    if (args.length < 4) {
      System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
      System.exit(1);
    }

    StreamingExamples.setStreamingLogLevels();
    // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // sparkConf.setMaster("spark://60f81dc6426c:7077");
    // SparkConf sparkConf = new
    // SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077");

    // Create the context with a 1 second batch size
    JavaStreamingContext jssc =
        new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000));

    int numThreads = Integer.parseInt(args[3]);
    Logger.getLogger("org").setLevel(Level.OFF);
    Logger.getLogger("akka").setLevel(Level.OFF);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
      topicMap.put(topic, numThreads);
    }
    /* for(String t: topic)
    {
        topicMap.put(t, new Integer(3));
    }*/
    // NotSerializable notSerializable = new NotSerializable();
    // JavaRDD<String> rdd = sc.textFile("/tmp/myfile");

    // rdd.map(s -> notSerializable.doSomething(s)).collect();
    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(jssc, args[0], args[1], topicMap);
    // JavaPairReceiverInputDStream<String, String> kafkaStream =
    //   KafkaUtils.createStream(jssc, "localhost:2181","streamingContext",
    //		  topicMap);

    System.out.println("Connection !!!!");
    /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>()
    {
        public String call(Tuple2<String, String> message)
        {
            return message._2();
        }
    }
    );*/

    JavaDStream<String> lines =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterable<String> call(String x) {
                return Lists.newArrayList(SPACE.split(x));
              }
            });

    JavaPairDStream<String, Integer> wordCounts =
        words
            .mapToPair(
                new PairFunction<String, String, Integer>() {
                  @Override
                  public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                  }
                })
            .reduceByKey(
                new Function2<Integer, Integer, Integer>() {
                  @Override
                  public Integer call(Integer i1, Integer i2) {
                    return i1 + i2;
                  }
                });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
  }