@SuppressWarnings("ConstantConditions")
 JavaDStream<WindowedValue<T>> getDStream() {
   if (dStream == null) {
     WindowedValue.ValueOnlyWindowedValueCoder<T> windowCoder =
         WindowedValue.getValueOnlyCoder(coder);
     // create the DStream from queue
     Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
     JavaRDD<WindowedValue<T>> lastRDD = null;
     for (Iterable<T> v : values) {
       Iterable<WindowedValue<T>> windowedValues =
           Iterables.transform(v, WindowingHelpers.<T>windowValueFunction());
       JavaRDD<WindowedValue<T>> rdd =
           jssc.sc()
               .parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder))
               .map(CoderHelpers.fromByteFunction(windowCoder));
       rddQueue.offer(rdd);
       lastRDD = rdd;
     }
     // create DStream from queue, one at a time,
     // with last as default in case batches repeat (graceful stops for example).
     // if the stream is empty, avoid creating a default empty RDD.
     // mainly for unit test so no reason to have this configurable.
     dStream =
         lastRDD != null
             ? jssc.queueStream(rddQueue, true, lastRDD)
             : jssc.queueStream(rddQueue, true);
   }
   return dStream;
 }
예제 #2
0
  public static void main(String[] args) throws Exception {
    if (args.length < 1) {
      System.err.println("Usage: JavaQueueStream <master>");
      System.exit(1);
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context
    JavaStreamingContext ssc =
        new JavaStreamingContext(
            args[0],
            "QueueStream",
            new Duration(1000),
            System.getenv("SPARK_HOME"),
            JavaStreamingContext.jarOfClass(JavaQueueStream.class));

    // Create the queue through which RDDs can be pushed to
    // a QueueInputDStream
    Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>();

    // Create and push some RDDs into the queue
    List<Integer> list = Lists.newArrayList();
    for (int i = 0; i < 1000; i++) {
      list.add(i);
    }

    for (int i = 0; i < 30; i++) {
      rddQueue.add(ssc.sparkContext().parallelize(list));
    }

    // Create the QueueInputDStream and use it do some processing
    JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
    JavaPairDStream<Integer, Integer> mappedStream =
        inputStream.mapToPair(
            new PairFunction<Integer, Integer, Integer>() {
              @Override
              public Tuple2<Integer, Integer> call(Integer i) {
                return new Tuple2<Integer, Integer>(i % 10, 1);
              }
            });
    JavaPairDStream<Integer, Integer> reducedStream =
        mappedStream.reduceByKey(
            new Function2<Integer, Integer, Integer>() {
              @Override
              public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
              }
            });

    reducedStream.print();
    ssc.start();
    ssc.awaitTermination();
  }