コード例 #1
0
  private static JavaStreamingContext createContext(String input, String checkpointDirectory) {
    System.out.println("Creating new context");
    // final File outputFile = new File("/flume_recover");
    // if (outputFile.exists()) {
    // outputFile.delete();
    // }

    SparkConf conf =
        new SparkConf()
            .setMaster("local[2]")
            .setAppName("Stream File")
            .set("spark.driver.allowMultipleContexts", "true");
    conf.set("spark.serializer", KryoSerializer.class.getName());
    conf.set("es.index.auto.create", "true");
    conf.set("es.nodes", "10.26.1.134:9200");
    conf.set("es.resource", "flume/test");
    conf.set("es.input.json", "true");

    JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(3000));
    jssc.checkpoint(checkpointDirectory);

    JavaDStream<String> textFile = jssc.textFileStream(input);
    JavaDStream<String> jsonStr =
        textFile.map(
            new Function<String, String>() {
              public String call(String arg0) throws Exception {
                Matcher m = log.matcher(arg0);
                if (m.find()) {
                  return transferJson(m);
                }
                return "";
              }
            });
    jsonStr.print();

    jsonStr.foreach(
        new Function<JavaRDD<String>, Void>() {
          public Void call(JavaRDD<String> arg0) throws Exception {
            if (!arg0.isEmpty() && arg0 != null) {
              JavaEsSpark.saveToEs(arg0, "flume/test");
            }
            return null;
          }
        });

    return jssc;
  }
コード例 #2
0
  public static void main(String[] args) {
    // Create a Spark Context.
    SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true");
    ;
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL);
    String TOPIC = "activityevent";
    String zkQuorum = "localhost:2181";
    String group = "1";
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(TOPIC, 1);

    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);
    // messages.print();
    JavaDStream<String> activitydatastream =
        messages.map(
            new Function<Tuple2<String, String>, String>() {
              @Override
              public String call(Tuple2<String, String> tuple2) {
                return tuple2._2();
              }
            });

    final Long teamWindowDurationMs = Durations.minutes(1).milliseconds();
    JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine);
    JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream =
        ActivityEntryDStream.mapToPair(
                windows ->
                    new Tuple2<>(
                        WithTimestamp.create(
                            windows.getActivity(),
                            // Apply Fixed Window by rounding the timestamp down to the nearest
                            // multiple of the window size
                            (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs)
                                * teamWindowDurationMs),
                        windows.getXaxis()))
            .reduceByKey(SUM_REDUCER);

    ActivityWindowDStream.print();

    jssc.start();
    jssc.awaitTermination();
    // jssc.close();
    sc.stop();
    sc.close();
  }
  public JavaStreamingContext createContext() {
    CassandraConnectionContext connectionContext = connectToCassandra();
    JavaDStream<String> rdd = connectionContext.getRDD();
    JavaStreamingContext sparkContext = connectionContext.getStreamingContext();
    final CassandraConnector connector = connectionContext.getCassandraConnector();

    final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    JavaDStream<Tuple2<String, Transaction>> transactionList =
        rdd.map(
            new Function<String, Tuple2<String, Transaction>>() {
              @Override
              public Tuple2<String, Transaction> call(String line) throws Exception {
                String[] columns = line.split(",");
                String taxId = columns[0];
                String name = columns[1];
                String merchant = columns[2];
                BigDecimal amount = new BigDecimal(columns[3]);
                Date transactionDate;
                try {
                  transactionDate = format.parse(columns[4]);
                } catch (ParseException e) {
                  e.printStackTrace();
                  throw new RuntimeException(e);
                }
                String tranId = columns[5];
                System.out.println(line);
                Tuple2<String, Transaction> transaction =
                    new Tuple2<>(
                        taxId, new Transaction(name, merchant, amount, transactionDate, tranId));
                return transaction;
              }
            });
    transactionList.cache();

    final String warningsTableName = "warnings";
    try (Session session = connector.openSession()) {
      session.execute(
          String.format("DROP TABLE IF EXISTS %s.%s", getKeySpaceName(), warningsTableName));
      session.execute(
          String.format(
              "CREATE TABLE IF NOT EXISTS %s.%s (ssn text, "
                  + "id uuid, amount decimal, rule text, PRIMARY KEY(ssn, id))",
              getKeySpaceName(), warningsTableName));
    }

    // setup warning on more than certain number of transactions by user in a 60 second window,
    // every 10 seconds
    JavaPairDStream<String, BigDecimal> warnings =
        transactionList
            .window(new Duration(60000), new Duration(10000))
            .mapToPair(
                new PairFunction<Tuple2<String, Transaction>, String, BigDecimal>() {
                  @Override
                  public Tuple2<String, BigDecimal> call(Tuple2<String, Transaction> transaction)
                      throws Exception {
                    String taxId = transaction._1();
                    BigDecimal amount = transaction._2().getAmount();
                    return new Tuple2<>(taxId, amount);
                  }
                })
            .reduceByKey(
                new Function2<BigDecimal, BigDecimal, BigDecimal>() {
                  @Override
                  public BigDecimal call(
                      BigDecimal transactionAmount1, BigDecimal transactionAmount2)
                      throws Exception {
                    return transactionAmount1.add(transactionAmount2);
                  }
                })
            .filter(
                new Function<Tuple2<String, BigDecimal>, Boolean>() {
                  @Override
                  public Boolean call(Tuple2<String, BigDecimal> transactionSumByTaxId)
                      throws Exception {
                    // returns true if total is greater than 999
                    Boolean result = transactionSumByTaxId._2().compareTo(new BigDecimal(999)) == 1;
                    System.out.println(
                        "tran "
                            + transactionSumByTaxId._1()
                            + " with amount "
                            + transactionSumByTaxId._2());
                    if (result) {
                      System.out.println(
                          "warning "
                              + transactionSumByTaxId._1()
                              + " has value "
                              + transactionSumByTaxId._2()
                              + " is greater than 999");
                    }
                    return result;
                  }
                });
    JavaDStream<Warning> mappedWarnings =
        warnings.map(
            new Function<Tuple2<String, BigDecimal>, Warning>() {
              @Override
              public Warning call(Tuple2<String, BigDecimal> warnings) throws Exception {
                Warning warning = new Warning();
                warning.setSsn(warnings._1());
                warning.setId(UUIDs.timeBased());
                warning.setAmount(warnings._2());
                warning.setRule("OVER_DOLLAR_AMOUNT");
                return warning;
              }
            });
    javaFunctions(mappedWarnings)
        .writerBuilder(getKeySpaceName(), warningsTableName, mapToRow(Warning.class))
        .saveToCassandra();
    return sparkContext;
  }