@Override
  public SparkWorkloadOperator<WithTime<String>> stringStreamFromKafkaWithTime(
      String zkConStr,
      String kafkaServers,
      String group,
      String topics,
      String offset,
      String componentId,
      int parallelism) {
    HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
    HashMap<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", kafkaServers);
    kafkaParams.put("auto.offset.reset", offset);
    kafkaParams.put("zookeeper.connect", zkConStr);
    kafkaParams.put("group.id", group);

    // Create direct kafka stream with brokers and topics
    JavaPairInputDStream<String, String> messages =
        KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet);

    JavaDStream<WithTime<String>> lines = messages.map(mapFunctionWithTime);

    return new SparkWorkloadOperator<>(lines, parallelism);
  }
  public static void main(String[] args) {
    if (args.length < 4) {
      System.err.println("Usage: PDCKafkaConsumer <zkQuorum> <group> <topics> <numThreads>");
      System.exit(1);
    }

    String zkQuorum = args[0];
    String kfGrp = args[1];
    String[] topics = args[2].split(",");
    int numThreads = Integer.valueOf(args[3]);

    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    for (String topic : topics) {
      topicMap.put(topic, numThreads);
    }

    SparkConf conf = new SparkConf().setAppName("PDCKafkaConsumer");
    conf.set("spark.ui.port", "4040");
    JavaStreamingContext ctx = new JavaStreamingContext(conf, new Duration(10000));
    JavaPairReceiverInputDStream<String, String> kfStream =
        KafkaUtils.createStream(ctx, zkQuorum, kfGrp, topicMap);
    kfStream.saveAsHadoopFiles(
        "/phasor/pmu/pdc", "in", Text.class, Text.class, TextOutputFormat.class);

    ctx.start();
    ctx.awaitTermination();
  }
  public static void main(String[] args) {
    // Create a Spark Context.
    SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true");
    ;
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL);
    String TOPIC = "activityevent";
    String zkQuorum = "localhost:2181";
    String group = "1";
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(TOPIC, 1);

    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);
    // messages.print();
    JavaDStream<String> activitydatastream =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    final Long teamWindowDurationMs = Durations.minutes(1).milliseconds();
    JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine);
    JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream =
        ActivityEntryDStream.mapToPair(
                windows ->
                    new Tuple2<>(
                        WithTimestamp.create(
                            windows.getActivity(),
                            // Apply Fixed Window by rounding the timestamp down to the nearest
                            // multiple of the window size
                            (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs)
                                * teamWindowDurationMs),
                        windows.getXaxis()))
            .reduceByKey(SUM_REDUCER);

    ActivityWindowDStream.print();

    jssc.start();
    jssc.awaitTermination();
    // jssc.close();
    sc.stop();
    sc.close();
  }
  public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("kafka-sandbox").setMaster("local[*]");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

    Set<String> topics = Collections.singleton("mytopic");
    Map<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", "sandbox.hortonworks.com:6667");

    JavaPairInputDStream<String, byte[]> directKafkaStream =
        KafkaUtils.createDirectStream(
            ssc,
            String.class,
            byte[].class,
            StringDecoder.class,
            DefaultDecoder.class,
            kafkaParams,
            topics);

    directKafkaStream.foreachRDD(
        rdd -> {
          rdd.foreach(
              avroRecord -> {
                Schema.Parser parser = new Schema.Parser();
                Schema schema = parser.parse(AvroVulabProducer.USER_SCHEMA);
                Injection<GenericRecord, byte[]> recordInjection =
                    GenericAvroCodecs.toBinary(schema);
                GenericRecord record = recordInjection.invert(avroRecord._2).get();

                System.out.println(
                    "str1= "
                        + record.get("str1")
                        + ", str2= "
                        + record.get("str2")
                        + ", int1="
                        + record.get("int1"));
              });
        });

    ssc.start();
    ssc.awaitTermination();
  }
Exemplo n.º 5
0
  public void run() {

    System.setProperty("spark.hadoop.dfs.replication", "2");

    Logger.getLogger("org").setLevel(Level.OFF);
    Logger.getLogger("akka").setLevel(Level.OFF);

    SparkConf conf = new SparkConf().setAppName("WindowingKafkaWordCountWithFaultTolerance");
    conf.set("spark.master", PropertiesStack.getProperty("spark.master"));
    conf.set("spark.executor.memory", PropertiesStack.getProperty("spark.executor.memory"));
    conf.set("spark.driver.memory", PropertiesStack.getProperty("spark.driver.memory"));
    conf.set(
        "spark.driver.maxResultSize", PropertiesStack.getProperty("spark.driver.maxResultSize"));
    // .setAppName("WindowingKafkaWordCountWithoutFaultTolerance");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(10));

    HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(PropertiesStack.getKafkaTopic()));

    HashMap<String, String> kafkaParams = new HashMap<String, String>();
    kafkaParams.put("metadata.broker.list", PropertiesStack.getKafkaBootstrapServers());
    kafkaParams.put("zookeeper.connect", PropertiesStack.getZookeeperConnect());
    kafkaParams.put("auto.offset.reset", "smallest");
    kafkaParams.put("group.id", PropertiesStack.getKafkaGroupId());
    kafkaParams.put("auto.commit.enable", "false");

    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(PropertiesStack.getKafkaTopic(), 1);
    //		Map<kafka.common.TopicAndPartition, java.lang.Long> fromOffsets = new HashMap<>();
    //		fromOffsets.put(new TopicAndPartition(PropertiesStack.getKafkaTopic(),
    //				1), 1000L);
    // Create direct kafka stream with brokers and topics
    //		JavaInputDStream<String> messages = KafkaUtils
    //				.createDirectStream(
    //						jssc,
    //						String.class,
    //						String.class,
    //						StringDecoder.class,
    //						StringDecoder.class,
    //						String.class,
    //						kafkaParams,
    //						fromOffsets,
    //						new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
    //							@Override
    //							public String call(
    //									MessageAndMetadata<String, String> v1)
    //									throws Exception {
    //								return v1.message();
    //							}
    //						});
    JavaPairInputDStream<String, String> messages =
        KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet);
    messages.count().print();
    // .createStream(jssc, PropertiesStack.getZookeeperConnect(),
    // PropertiesStack.getKafkaGroupId(), topicMap);

    // Start the computation
    jssc.start();
    jssc.awaitTermination();
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println(
          "Usage: JavaDirectKafkaWordCount <brokers> <topics>\n"
              + "  <brokers> is a list of one or more Kafka brokers\n"
              + "  <topics> is a list of one or more kafka topics to consume from\n\n");
      System.exit(1);
    }

    String brokers = args[0];
    String topics = args[1];

    // Create context with a 2 seconds batch interval
    SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));

    Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
    Map<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", brokers);

    // Create direct kafka stream with brokers and topics
    JavaPairInputDStream<String, String> messages =
        KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet);

    // Get the lines, split them into words, count the words and print
    JavaDStream<String> lines =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterable<String> call(String x) {
                return Arrays.asList(SPACE.split(x));
              }
            });
    JavaPairDStream<String, Integer> wordCounts =
        words
            .mapToPair(
                new PairFunction<String, String, Integer>() {
                  @Override
                  public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                  }
                })
            .reduceByKey(
                new Function2<Integer, Integer, Integer>() {
                  @Override
                  public Integer call(Integer i1, Integer i2) {
                    return i1 + i2;
                  }
                });
    wordCounts.print();

    // Start the computation
    jssc.start();
    jssc.awaitTermination();
  }
  public static void main(String[] args) {
    if (args.length < 4) {
      System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
      System.exit(1);
    }

    StreamingExamples.setStreamingLogLevels();
    // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // sparkConf.setMaster("spark://60f81dc6426c:7077");
    // SparkConf sparkConf = new
    // SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077");

    // Create the context with a 1 second batch size
    JavaStreamingContext jssc =
        new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000));

    int numThreads = Integer.parseInt(args[3]);
    Logger.getLogger("org").setLevel(Level.OFF);
    Logger.getLogger("akka").setLevel(Level.OFF);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
      topicMap.put(topic, numThreads);
    }
    /* for(String t: topic)
    {
        topicMap.put(t, new Integer(3));
    }*/
    // NotSerializable notSerializable = new NotSerializable();
    // JavaRDD<String> rdd = sc.textFile("/tmp/myfile");

    // rdd.map(s -> notSerializable.doSomething(s)).collect();
    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(jssc, args[0], args[1], topicMap);
    // JavaPairReceiverInputDStream<String, String> kafkaStream =
    //   KafkaUtils.createStream(jssc, "localhost:2181","streamingContext",
    //		  topicMap);

    System.out.println("Connection !!!!");
    /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>()
    {
        public String call(Tuple2<String, String> message)
        {
            return message._2();
        }
    }
    );*/

    JavaDStream<String> lines =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    JavaDStream<String> words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterable<String> call(String x) {
                return Lists.newArrayList(SPACE.split(x));
              }
            });

    JavaPairDStream<String, Integer> wordCounts =
        words
            .mapToPair(
                new PairFunction<String, String, Integer>() {
                  @Override
                  public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                  }
                })
            .reduceByKey(
                new Function2<Integer, Integer, Integer>() {
                  @Override
                  public Integer call(Integer i1, Integer i2) {
                    return i1 + i2;
                  }
                });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
  }