private static JavaStreamingContext createContext(String input, String checkpointDirectory) { System.out.println("Creating new context"); // final File outputFile = new File("/flume_recover"); // if (outputFile.exists()) { // outputFile.delete(); // } SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("Stream File") .set("spark.driver.allowMultipleContexts", "true"); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.set("es.index.auto.create", "true"); conf.set("es.nodes", "10.26.1.134:9200"); conf.set("es.resource", "flume/test"); conf.set("es.input.json", "true"); JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(3000)); jssc.checkpoint(checkpointDirectory); JavaDStream<String> textFile = jssc.textFileStream(input); JavaDStream<String> jsonStr = textFile.map( new Function<String, String>() { public String call(String arg0) throws Exception { Matcher m = log.matcher(arg0); if (m.find()) { return transferJson(m); } return ""; } }); jsonStr.print(); jsonStr.foreach( new Function<JavaRDD<String>, Void>() { public Void call(JavaRDD<String> arg0) throws Exception { if (!arg0.isEmpty() && arg0 != null) { JavaEsSpark.saveToEs(arg0, "flume/test"); } return null; } }); return jssc; }
public static void main(String[] args) { // Create a Spark Context. SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true"); ; JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL); String TOPIC = "activityevent"; String zkQuorum = "localhost:2181"; String group = "1"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(TOPIC, 1); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap); // messages.print(); JavaDStream<String> activitydatastream = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); final Long teamWindowDurationMs = Durations.minutes(1).milliseconds(); JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine); JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream = ActivityEntryDStream.mapToPair( windows -> new Tuple2<>( WithTimestamp.create( windows.getActivity(), // Apply Fixed Window by rounding the timestamp down to the nearest // multiple of the window size (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs) * teamWindowDurationMs), windows.getXaxis())) .reduceByKey(SUM_REDUCER); ActivityWindowDStream.print(); jssc.start(); jssc.awaitTermination(); // jssc.close(); sc.stop(); sc.close(); }
public JavaStreamingContext createContext() { CassandraConnectionContext connectionContext = connectToCassandra(); JavaDStream<String> rdd = connectionContext.getRDD(); JavaStreamingContext sparkContext = connectionContext.getStreamingContext(); final CassandraConnector connector = connectionContext.getCassandraConnector(); final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); JavaDStream<Tuple2<String, Transaction>> transactionList = rdd.map( new Function<String, Tuple2<String, Transaction>>() { @Override public Tuple2<String, Transaction> call(String line) throws Exception { String[] columns = line.split(","); String taxId = columns[0]; String name = columns[1]; String merchant = columns[2]; BigDecimal amount = new BigDecimal(columns[3]); Date transactionDate; try { transactionDate = format.parse(columns[4]); } catch (ParseException e) { e.printStackTrace(); throw new RuntimeException(e); } String tranId = columns[5]; System.out.println(line); Tuple2<String, Transaction> transaction = new Tuple2<>( taxId, new Transaction(name, merchant, amount, transactionDate, tranId)); return transaction; } }); transactionList.cache(); final String warningsTableName = "warnings"; try (Session session = connector.openSession()) { session.execute( String.format("DROP TABLE IF EXISTS %s.%s", getKeySpaceName(), warningsTableName)); session.execute( String.format( "CREATE TABLE IF NOT EXISTS %s.%s (ssn text, " + "id uuid, amount decimal, rule text, PRIMARY KEY(ssn, id))", getKeySpaceName(), warningsTableName)); } // setup warning on more than certain number of transactions by user in a 60 second window, // every 10 seconds JavaPairDStream<String, BigDecimal> warnings = transactionList .window(new Duration(60000), new Duration(10000)) .mapToPair( new PairFunction<Tuple2<String, Transaction>, String, BigDecimal>() { @Override public Tuple2<String, BigDecimal> call(Tuple2<String, Transaction> transaction) throws Exception { String taxId = transaction._1(); BigDecimal amount = transaction._2().getAmount(); return new Tuple2<>(taxId, amount); } }) .reduceByKey( new Function2<BigDecimal, BigDecimal, BigDecimal>() { @Override public BigDecimal call( BigDecimal transactionAmount1, BigDecimal transactionAmount2) throws Exception { return transactionAmount1.add(transactionAmount2); } }) .filter( new Function<Tuple2<String, BigDecimal>, Boolean>() { @Override public Boolean call(Tuple2<String, BigDecimal> transactionSumByTaxId) throws Exception { // returns true if total is greater than 999 Boolean result = transactionSumByTaxId._2().compareTo(new BigDecimal(999)) == 1; System.out.println( "tran " + transactionSumByTaxId._1() + " with amount " + transactionSumByTaxId._2()); if (result) { System.out.println( "warning " + transactionSumByTaxId._1() + " has value " + transactionSumByTaxId._2() + " is greater than 999"); } return result; } }); JavaDStream<Warning> mappedWarnings = warnings.map( new Function<Tuple2<String, BigDecimal>, Warning>() { @Override public Warning call(Tuple2<String, BigDecimal> warnings) throws Exception { Warning warning = new Warning(); warning.setSsn(warnings._1()); warning.setId(UUIDs.timeBased()); warning.setAmount(warnings._2()); warning.setRule("OVER_DOLLAR_AMOUNT"); return warning; } }); javaFunctions(mappedWarnings) .writerBuilder(getKeySpaceName(), warningsTableName, mapToRow(Warning.class)) .saveToCassandra(); return sparkContext; }