public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("JavaLogQuery"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); JavaRDD<String> dataSet = (args.length == 1) ? jsc.textFile(args[0]) : jsc.parallelize(exampleApacheLogs); JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.mapToPair( new PairFunction<String, Tuple3<String, String, String>, Stats>() { @Override public Tuple2<Tuple3<String, String, String>, Stats> call(String s) { return new Tuple2<Tuple3<String, String, String>, Stats>( extractKey(s), extractStats(s)); } }); JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey( new Function2<Stats, Stats, Stats>() { @Override public Stats call(Stats stats, Stats stats2) { return stats.merge(stats2); } }); List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect(); for (Tuple2<?, ?> t : output) { System.out.println(t._1() + "\t" + t._2()); } jsc.stop(); }
@After public void tearDown() throws Exception { if (sc != null) { sc.stop(); sc.close(); } }
@Override public void kill() throws InterruptedException { if (started) { sparkContext.stop(); set(PipelineResult.EMPTY); } }
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: KMeansMP <input_file> <results>"); System.exit(1); } String inputFile = args[0]; String results_path = args[1]; JavaPairRDD<Integer, Iterable<String>> results; int k = 4; int iterations = 100; int runs = 1; long seed = 0; final KMeansModel model; SparkConf sparkConf = new SparkConf().setAppName("KMeans MP"); JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaRDD<String> lines = sc.textFile(inputFile); JavaRDD<Vector> points = lines.map(new ParsePoint()); JavaRDD<String> titles = lines.map(new ParseTitle()); model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.RANDOM(), 0); results = titles.zip(points).mapToPair(new ClusterCars(model)).groupByKey(); results.saveAsTextFile(results_path); sc.stop(); }
@AfterClass public static void clean() throws Exception { if (sc != null) { sc.stop(); // wait for jetty & spark to properly shutdown Thread.sleep(TimeUnit.SECONDS.toMillis(2)); } }
public static void setJsscStartFlag() { if (jscStartFlag) { m_jsc.stop(); jscStartFlag = false; m_jsc = null; } jsscStartFlag = true; }
public static void main(String args[]) { SparkConf conf = new SparkConf().setAppName("KeyValueTest").setMaster("local"); JavaSparkContext jsc = new JavaSparkContext(conf); JavaRDD<String> lines = jsc.textFile("/home/piyushm/samplejson.json"); List<Person> persons = lines.mapPartitions(new ParseJson()).collect(); JavaRDD<Person> personJavaRDD = jsc.parallelize(persons); JavaRDD<String> csvFileContent = jsc.textFile("/opt/sample.csv"); System.out.println(csvFileContent.map(new ParseLine()).collect()); System.out.println(persons); System.out.println(personJavaRDD.mapPartitions(new WriteJson()).collect()); jsc.stop(); }
public static void main(String[] args) throws Exception { if (args.length < 2) { throw new IllegalArgumentException( "The number of arguments is incorrect. Usage:\n" + " <configuration file (conf.xml) path> <job file (.analysis.xml) path> [properties file path]\n" + "Got: " + Arrays.toString(args)); } final SparkConf conf = new SparkConf().setAppName("DataCleaner-spark"); final JavaSparkContext sparkContext = new JavaSparkContext(conf); final URI confXmlPath = URI.create(args[0]); final URI analysisJobXmlPath = URI.create(args[1]); final URI propertiesPath; if (args.length > 2) { propertiesPath = URI.create(args[2]); } else { propertiesPath = null; } final SparkJobContext sparkJobContext = new SparkJobContext(confXmlPath, analysisJobXmlPath, propertiesPath, sparkContext); final ServiceLoader<SparkJobLifeCycleListener> listenerLoaders = ServiceLoader.load(SparkJobLifeCycleListener.class); for (SparkJobLifeCycleListener listener : listenerLoaders) { sparkJobContext.addSparkJobLifeCycleListener(listener); } final SparkAnalysisRunner sparkAnalysisRunner = new SparkAnalysisRunner(sparkContext, sparkJobContext); try { final AnalysisResultFuture result = sparkAnalysisRunner.run(); result.await(); if (sparkJobContext.isResultEnabled()) { final Resource resultResource = ResultFilePathUtils.getResultResource(sparkContext, sparkJobContext); logger.info("DataCleaner result will be written to: {}", resultResource); saveResult(result, resultResource); } else { logger.info("DataCleaner result will not be written - disabled"); } } finally { sparkContext.stop(); } }
public static void main(String[] args) { JavaSparkContext javaSparkContext = SparkConfSetup.getJavaSparkContext(); CassandraConnector connector = SparkConfSetup.getCassandraConnector(); basicCassandraSession(connector); writePeopleToCassandra(javaSparkContext); readPeopleFromCassandra(javaSparkContext); javaSparkContext.stop(); }
public static void main(String[] args) { // create spark and sql context JavaSparkContext ctx = CreateSparkContext.create(args); SQLContext sqlContext = new SQLContext(ctx); WorkflowContext workflowContext = new WorkflowContext(); chartswf(ctx, sqlContext, workflowContext); // stop the context ctx.stop(); }
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: JavaWordCount <input_file> <output_file>"); System.exit(1); } SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 1); JavaRDD<String> words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String s) { return Arrays.asList(SPACE.split(s)).iterator(); } }); JavaPairRDD<String, Integer> ones = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); /* List<Tuple2<String, Integer>> output = counts.collect(); for (Tuple2<?,?> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } */ counts.saveAsTextFile(args[1]); ctx.stop(); }
public static void main(String[] args) { // Create a Spark Context. SparkConf conf = new SparkConf().setAppName("Activity").set("spark.eventLog.enabled", "true"); ; JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext jssc = new JavaStreamingContext(sc, STREAM_INTERVAL); String TOPIC = "activityevent"; String zkQuorum = "localhost:2181"; String group = "1"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(TOPIC, 1); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap); // messages.print(); JavaDStream<String> activitydatastream = messages.map( new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); final Long teamWindowDurationMs = Durations.minutes(1).milliseconds(); JavaDStream<Activity> ActivityEntryDStream = activitydatastream.map(Activity::parseFromLine); JavaPairDStream<WithTimestamp<String>, Double> ActivityWindowDStream = ActivityEntryDStream.mapToPair( windows -> new Tuple2<>( WithTimestamp.create( windows.getActivity(), // Apply Fixed Window by rounding the timestamp down to the nearest // multiple of the window size (convertMillsecs(windows.getTimestamp()) / teamWindowDurationMs) * teamWindowDurationMs), windows.getXaxis())) .reduceByKey(SUM_REDUCER); ActivityWindowDStream.print(); jssc.start(); jssc.awaitTermination(); // jssc.close(); sc.stop(); sc.close(); }
public JavaApiDemo() { DemoApp demoApp = DemoApp$.MODULE$.apply(); JavaSparkContext sc = new JavaSparkContext(demoApp.sc()); // Here we are going to save some data to Cassandra... List<Person> people = Arrays.asList( Person.newInstance(1, "John", new Date()), Person.newInstance(2, "Anna", new Date()), Person.newInstance(3, "Andrew", new Date())); JavaRDD<Person> rdd = sc.parallelize(people); javaFunctions(rdd, Person.class).saveToCassandra("test", "people"); // then, we want to read that data as an RDD of CassandraRows and convert them to strings... JavaRDD<String> cassandraRowsRDD = javaFunctions(sc) .cassandraTable("test", "people") .toJavaRDD() .map( new Function<CassandraRow, String>() { @Override public String call(CassandraRow cassandraRow) throws Exception { return cassandraRow.toString(); } }); System.out.println( "Data as CassandraRows: \n" + StringUtils.join("\n", cassandraRowsRDD.toArray())); // finally, we want to read that data as an RDD of Person beans and also convert them to // strings... JavaRDD<String> rdd2 = javaFunctions(sc) .cassandraTable("test", "people", Person.class) .toJavaRDD() .map( new Function<Person, String>() { @Override public String call(Person person) throws Exception { return person.toString(); } }); System.out.println("Data as Person beans: \n" + StringUtils.join("\n", rdd2.toArray())); sc.stop(); }
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: NaiveBayesExample <training_data> <test_data>"); System.exit(1); } String training_data_path = args[0]; // https://class.coursera.org/cloudapplications-001/forum/thread?thread_id=1387 // String test_data_path = args[0]; String test_data_path = args[1]; SparkConf sparkConf = new SparkConf().setAppName("NaiveBayesExample"); JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaRDD<LabeledPoint> train = sc.textFile(training_data_path).map(new DataToPoint()); // JavaRDD<LabeledPoint> test = sc.textFile(training_data_path).map(new DataToPoint()); JavaRDD<LabeledPoint> test = sc.textFile(test_data_path).map(new DataToPoint()); final NaiveBayesModel model = NaiveBayes.train(train.rdd(), 1.0); JavaPairRDD<Double, Double> predictionAndLabel = test.mapToPair( new PairFunction<LabeledPoint, Double, Double>() { public Tuple2<Double, Double> call(LabeledPoint p) { return new Tuple2<Double, Double>(model.predict(p.features()), p.label()); } }); double accuracy = predictionAndLabel .filter( new Function<Tuple2<Double, Double>, Boolean>() { public Boolean call(Tuple2<Double, Double> pl) { return pl._1().equals(pl._2()); } }) .count() / (double) test.count(); System.out.println(accuracy); sc.stop(); }
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaNormalizerExample"); JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); // $example on$ DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); // Normalize each Vector using $L^1$ norm. Normalizer normalizer = new Normalizer().setInputCol("features").setOutputCol("normFeatures").setP(1.0); DataFrame l1NormData = normalizer.transform(dataFrame); l1NormData.show(); // Normalize each Vector using $L^\infty$ norm. DataFrame lInfNormData = normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); lInfNormData.show(); // $example off$ jsc.stop(); }
public static void main(String[] args) { SparkConf sparkconf = new SparkConf() .setAppName("Simple Application") .setMaster("spark://1.245.77.10:7077") .set( "spark.driver.extraClassPath", "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*") .set( "spark.executor.extraClassPath", "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*") .set("fs.default.name", "file:///"); JavaSparkContext sc = new JavaSparkContext(sparkconf); Configuration hadoopConfig = sc.hadoopConfiguration(); hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw-0.2.jar"); // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw4spark-0.2.jar"); /*JavaRDD<Double> matrdd2 = sc.parallelize(Arrays.asList(1.0, 3.0, 2.0)); System.out.println("Start counting parallelize..."); long values = matrdd2.count(); System.out.println("Value count of parallelize is " + values);*/ JavaPairRDD<Long, Double> matrdd = sc.newAPIHadoopFile( "e:/tmp/vecRow03_x256.mat", JMATFileInputFormat.class, Long.class, Double.class, hadoopConfig); System.out.println("Start job..."); long values = matrdd.count(); System.out.println("Value count of hadoop is " + values); sc.stop(); sc.close(); }
public static void main(String[] args) throws Exception { SparkConf sparkConf = new SparkConf().setAppName("ShopJsonParse"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> ciku = ctx.textFile("hdfs://hadoop119:9000/ciku/ciku_zhuyu.txt", 1); JavaRDD<String> zhuyu = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("1")) return str[0]; else return "kaer"; } }) .distinct() .cache(); JavaRDD<String> haoping = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("2")) return str[0]; else return "kaer"; } }) .distinct() .cache(); JavaRDD<String> chaping = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("3")) return str[0]; else return "kaer"; } }) .distinct() .cache(); final List<String> zhuyulist = zhuyu.collect(); final List<String> hplist = haoping.collect(); final List<String> cplist = chaping.collect(); JavaRDD<String> mongoratedata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ratelist.json"); JavaRDD<Map<String, Object>> mongorateall = mongoratedata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return ParseLineToMap(line); } private Map<String, Object> ParseLineToMap(String line) { Map<String, Object> documentMap = new HashMap<String, Object>(); try { JSONObject jsonline = new JSONObject(line); documentMap.put("PlatformItemId", jsonline.get("nid").toString()); Gson gson = new Gson(); rate rate = gson.fromJson(jsonline.get("rate").toString(), rate.class); documentMap.put("ratelist", rate.parsemod()); } catch (JSONException e) { e.printStackTrace(); } return documentMap; } }); JavaPairRDD<String, String> Rates = mongorateall.flatMapToPair( new PairFlatMapFunction<Map<String, Object>, String, String>() { @Override public Iterable<Tuple2<String, String>> call(Map<String, Object> map) throws Exception { ArrayList<Tuple2<String, String>> flatmaps = new ArrayList<Tuple2<String, String>>(); String itemid = (String) map.get("PlatformItemId"); String itempro = ""; Map<String, String> ratelist = (Map<String, String>) map.get("ratelist"); if (ratelist == null) { itempro = "null"; flatmaps.add(new Tuple2<String, String>(itemid, itempro)); } else { for (String value : ratelist.values()) { itempro = value; flatmaps.add(new Tuple2<String, String>(itemid, itempro)); } } return flatmaps; } }); final Pattern SPACES = Pattern.compile("\\s+"); JavaPairRDD<String, String> sentences = Rates.flatMapValues( new Function<String, Iterable<String>>() { @Override public Iterable<String> call(String s) throws Exception { ArrayList<String> list = new ArrayList<String>(); if (s.contains(" ")) { String[] str = SPACES.split(s); int num = 0; while (num < str.length) { if (!str[num].equals("")) list.add(str[num]); num++; } } else { list.add(s); } return list; } }); String filter = "的 也 很 都 了 非常 有些 还 是 点 些 就 看起来 看上去 更 呢 哦 确实 什么的 较 太 啊 吧 得 那么 什么 挺"; final String[] list = filter.split(" "); JavaPairRDD<String, String> words = sentences.mapValues( new Function<String, String>() { @Override public String call(String s) throws Exception { if (s.length() < 3) { return s + " " + "kaer"; } for (int i = 0; i < zhuyulist.size(); i++) { String zhuyu = zhuyulist.get(i); if (s.contains(zhuyu)) { s = s.replace(zhuyu, " "); int size = s.length(); int tap = s.lastIndexOf(" "); String ss = "kaer"; if (tap + 1 < size) { ss = s.substring(tap + 1, size); } else { if (tap - 1 > 0) ss = s.substring(0, tap - 1); } for (String tem : list) { if (ss.contains(tem)) ss = ss.replace(tem, ""); } return zhuyu + " " + ss; } } return "long null"; } }); JavaPairRDD<String, String> filterwords = words .mapValues( new Function<String, String>() { @Override public String call(String s) throws Exception { String tempstr; if (s.contains("kaer")) { tempstr = s.substring(0, s.indexOf(" ")); for (int i = 0; i < cplist.size(); i++) { if (tempstr.equals(cplist.get(i))) return "差评 " + "," + tempstr; } for (int i = 0; i < hplist.size(); i++) { if (tempstr.equals(hplist.get(i))) return "好评 " + "," + tempstr; } return "中评 " + "," + tempstr; } else if (s.contains("null")) { return s + ",null"; } else { if (s.endsWith(" ")) return "long null,null"; tempstr = s.split(" ")[1]; for (int i = 0; i < cplist.size(); i++) { if (tempstr.equals(cplist.get(i))) return "差评 " + s.split(" ")[0] + "," + tempstr; } for (int i = 0; i < hplist.size(); i++) { if (tempstr.equals(hplist.get(i))) return "好评 " + s.split(" ")[0] + "," + tempstr; } return "中评 " + s.split(" ")[0] + "," + tempstr; } } }) .filter( new Function<Tuple2<String, String>, Boolean>() { @Override public Boolean call(Tuple2<String, String> line) throws Exception { if (line._2.contains("null")) return false; else return true; } }); JavaPairRDD<String, String> ones = filterwords.mapToPair( new PairFunction<Tuple2<String, String>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, String> line) throws Exception { String key = line._1(); String value = "0,0,0", ll = line._2; if (ll.startsWith("好评")) value = "1,0,0"; else if (ll.startsWith("中评")) value = "0,1,0"; else if (ll.startsWith("差评")) value = "0,0,1"; return new Tuple2<String, String>(key, value); } }); JavaPairRDD<String, String> result = ones.reduceByKey( new Function2<String, String, String>() { @Override public String call(String s1, String s2) throws Exception { double h1 = Double.parseDouble(s1.split(",")[0]), h2 = Double.parseDouble(s1.split(",")[1]), h3 = Double.parseDouble(s1.split(",")[2]); double hh1 = Double.parseDouble(s2.split(",")[0]), hh2 = Double.parseDouble(s2.split(",")[1]), hh3 = Double.parseDouble(s2.split(",")[2]); return (h1 + hh1) + "," + (h2 + hh2) + "," + (h3 + hh3); } }); JavaPairRDD<String, Integer> rateresult = result.mapValues( new Function<String, Integer>() { @Override public Integer call(String s1) throws Exception { double h1 = Double.parseDouble(s1.split(",")[0]), h2 = Double.parseDouble(s1.split(",")[1]), h3 = Double.parseDouble(s1.split(",")[2]); if (h1 + h3 == 0) return 50; else { return (int) (h1 / (h1 + h3) * 100); } } }); JavaRDD<String> mongocontentdata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ProductContent.json"); JavaRDD<Map<String, Object>> mongocontentall = mongocontentdata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return new ShopParse().ParseLine(line); } }); JavaPairRDD<String, Map<String, Object>> content = mongocontentall.mapToPair( new PairFunction<Map<String, Object>, String, Map<String, Object>>() { @Override public Tuple2<String, Map<String, Object>> call(Map<String, Object> map) throws Exception { return new Tuple2<String, Map<String, Object>>( map.get("PlatformItemId").toString(), map); } }); JavaRDD<String> mongoproListdata = ctx.textFile("hdfs://hadoop119:9000/shopdata/productList.json"); JavaRDD<Map<String, Object>> mongoproListall = mongoproListdata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return new ShopParse().ParseproList(line); } }); System.out.println("mongoproListall counts :" + mongoproListall.count()); JavaPairRDD<String, Map<String, Object>> proList = mongoproListall .mapToPair( new PairFunction<Map<String, Object>, String, Map<String, Object>>() { @Override public Tuple2<String, Map<String, Object>> call(Map<String, Object> map) throws Exception { return new Tuple2<String, Map<String, Object>>( map.get("PlatformItemId").toString(), map); } }) .filter( new Function<Tuple2<String, Map<String, Object>>, Boolean>() { @Override public Boolean call(Tuple2<String, Map<String, Object>> line) throws Exception { if (line._2.get("isdownloads").toString().equals("true")) return true; else return false; } }); System.out.println("proList counts :" + proList.count()); JavaRDD<Map<String, Object>> ContJoinPro = content .join(proList) .map( new Function< Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>>, Map<String, Object>>() { @Override public Map<String, Object> call( Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>> line) throws Exception { Map<String, Object> mapprod = line._2._1; mapprod.put("Name", line._2._2.get("Name")); mapprod.put("Photo", line._2._2.get("Photo")); mapprod.put("SellerId", line._2._2.get("SellerId")); mapprod.put("StoreName", line._2._2.get("StoreName")); mapprod.put("Url", line._2._2.get("Url")); mapprod.put("TaokeUrl", line._2._2.get("TaokeUrl")); return mapprod; } }); JavaPairRDD<String, String> Messages = ContJoinPro.mapToPair( new PairFunction<Map<String, Object>, String, String>() { @Override public Tuple2<String, String> call(Map<String, Object> map) throws Exception { String itemid = (String) map.get("PlatformItemId"); String itempro = ""; String From = (String) map.get("isTmall"); if (From.equals("true")) From = "2"; else From = "1"; String Quantity = (String) map.get("Quantity"); String CmtCount = (String) map.get("ratecount"); String ImgPath = (String) map.get("detailmessage"); String[] ImgPaths = ImgPath.split("@=@=@"); // 1-5 String mobprice = (String) map.get("mobmessage"); String pcprice = (String) map.get("pcpricemessage"); String minmaxPrice = (String) map.get("MaxMinPrice"); String OriginalPrice = (String) map.get("OriginalPrice"); double p1 = Double.parseDouble(mobprice); double p2 = Double.parseDouble(pcprice.split("@=@=@")[0]); double min = Double.parseDouble(minmaxPrice.split(",")[0]); double max = Double.parseDouble(minmaxPrice.split(",")[1]); double origin = Double.parseDouble(OriginalPrice); double Price = p1; if (Price > p2) Price = p2; if (Price == 100000.00) Price = min; if (origin < max) OriginalPrice = max + ""; String IsPost = "0"; if (!pcprice.endsWith("@=@=@") && pcprice.split("@=@=@")[1].startsWith("0.00")) IsPost = "1"; String Name = (String) map.get("Name"); String SellerId = (String) map.get("SellerId"); String StoreName = (String) map.get("StoreName"); String Photo = (String) map.get("Photo"); String Url = (String) map.get("Url"); String TaokeUrl = (String) map.get("TaokeUrl"); DecimalFormat ddf = new DecimalFormat("#0.00"); String Discount = ddf.format(Price / Double.parseDouble(OriginalPrice)) + ""; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String AddTime = df.format(new Date()); String IsSell = "1"; String Type = "2"; String IsChangeImgPath = "0"; String HotKeyId = "0"; String OpenIid = "0"; itempro = From + "@=@=@" + Quantity + "@=@=@" + CmtCount + "@=@=@" + ImgPaths[0] + "@=@=@" + ImgPaths[1] + "@=@=@" + ImgPaths[2] + "@=@=@" + ImgPaths[3] + "@=@=@" + ImgPaths[4] + "@=@=@" + Price + "@=@=@" + IsPost + "@=@=@" + Name + "@=@=@" + SellerId + "@=@=@" + StoreName + "@=@=@" + OriginalPrice + "@=@=@" + Photo + "@=@=@" + Url + "@=@=@" + Discount + "@=@=@" + AddTime + "@=@=@" + IsSell + "@=@=@" + Type + "@=@=@" + IsChangeImgPath + "@=@=@" + HotKeyId + "@=@=@" + TaokeUrl + "@=@=@" + OpenIid; return new Tuple2<String, String>(itemid, itempro); } }); JavaRDD<String> MessagesAll = Messages.leftOuterJoin(rateresult) .map( new Function<Tuple2<String, Tuple2<String, Optional<Integer>>>, String>() { @Override public String call(Tuple2<String, Tuple2<String, Optional<Integer>>> line) throws Exception { Optional<Integer> possible = line._2._2; int fenshu = 50; if (possible.isPresent()) fenshu = line._2._2.get(); return line._1 + "@=@=@" + line._2._1 + "@=@=@" + fenshu; } }); List<String> messages = MessagesAll.collect(); new MessageToMysql().insert(messages); ctx.stop(); }
@After public void tearDown() { sc.stop(); sc = null; System.clearProperty("spark.driver.port"); }
static synchronized void stopSparkContext(JavaSparkContext context) { if (!Boolean.getBoolean(TEST_REUSE_SPARK_CONTEXT)) { context.stop(); } }
public static void main(String[] args) { // STEP 1: create a SparkConf object if (args.length < 1) { log.fatal("Syntax Error: there must be one argument (a file name or a directory)"); throw new RuntimeException(); } // STEP 2: create a SparkConf object SparkConf sparkConf = new SparkConf().setAppName("Trending Topic"); // STEP 3: create a Java Spark context JavaSparkContext sparkContext = new JavaSparkContext(sparkConf); // STEP 4: read lines of files JavaRDD<String> lines = sparkContext.textFile(args[0]); JavaRDD<String> words; words = lines.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable call(String s) throws Exception { return Arrays.asList(s.split("\t")[2].split(" ")); } }); JavaPairRDD<String, Integer> ones; ones = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String string) { return new Tuple2<>(string, 1); } }); JavaPairRDD<String, Integer> counts; counts = ones.reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) throws Exception { return integer + integer2; } }); // Es necesario invertir las tuplas ya que no podemos ordenar por valor, sino por clave JavaPairRDD<Integer, String> swapped; swapped = counts.mapToPair( new PairFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> tupla) throws Exception { return tupla.swap(); } }); // STEP 7: sort the results by key List<Tuple2<Integer, String>> output = swapped.sortByKey().collect(); // El ejercicio dice que quitemos las palabras que no aportan nada. Para ello podríamos ponerlas // en un fichero y leerlas y luego obviar esas. Vamos a obviar esa parte ya que se entiende y no // es el caso del ejercicio List<String> excluyentes = new LinkedList<>(); excluyentes.add("rt"); excluyentes.add("http"); excluyentes.add("https"); excluyentes.add("www"); for (Tuple2<Integer, String> t : output) { if (excluyentes.contains(t._2)) { output.remove(t); } } // STEP 8: print the results for (int i = 0; i < 10; i++) { Tuple2<Integer, String> tuple; tuple = output.get(i); System.out.println(tuple._2() + ": " + tuple._1()); } // STEP 9: stop the spark context sparkContext.stop(); }
public static void main(String[] args) { // parse the arguments Params params = parse(args); SparkConf conf = new SparkConf().setAppName("JavaOneVsRestExample"); JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); // configure the base classifier LogisticRegression classifier = new LogisticRegression() .setMaxIter(params.maxIter) .setTol(params.tol) .setFitIntercept(params.fitIntercept); if (params.regParam != null) { classifier.setRegParam(params.regParam); } if (params.elasticNetParam != null) { classifier.setElasticNetParam(params.elasticNetParam); } // instantiate the One Vs Rest Classifier OneVsRest ovr = new OneVsRest().setClassifier(classifier); String input = params.input; RDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), input); RDD<LabeledPoint> train; RDD<LabeledPoint> test; // compute the train/ test split: if testInput is not provided use part of input String testInput = params.testInput; if (testInput != null) { train = inputData; // compute the number of features in the training set. int numFeatures = inputData.first().features().size(); test = MLUtils.loadLibSVMFile(jsc.sc(), testInput, numFeatures); } else { double f = params.fracTest; RDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[] {1 - f, f}, 12345); train = tmp[0]; test = tmp[1]; } // train the multiclass model DataFrame trainingDataFrame = jsql.createDataFrame(train, LabeledPoint.class); OneVsRestModel ovrModel = ovr.fit(trainingDataFrame.cache()); // score the model on test data DataFrame testDataFrame = jsql.createDataFrame(test, LabeledPoint.class); DataFrame predictions = ovrModel.transform(testDataFrame.cache()).select("prediction", "label"); // obtain metrics MulticlassMetrics metrics = new MulticlassMetrics(predictions); StructField predictionColSchema = predictions.schema().apply("prediction"); Integer numClasses = (Integer) MetadataUtils.getNumClasses(predictionColSchema).get(); // compute the false positive rate per label StringBuilder results = new StringBuilder(); results.append("label\tfpr\n"); for (int label = 0; label < numClasses; label++) { results.append(label); results.append("\t"); results.append(metrics.falsePositiveRate((double) label)); results.append("\n"); } Matrix confusionMatrix = metrics.confusionMatrix(); // output the Confusion Matrix System.out.println("Confusion Matrix"); System.out.println(confusionMatrix); System.out.println(); System.out.println(results); jsc.stop(); }
@After public void tearDown() { sc.stop(); sc = null; }
@After public void stopSparkContext() { jsc.stop(); jsc = null; jsql = null; }
public static void main(String[] args) { // Path de resultados String pathResults = "results"; String pathToCategories = "values.txt"; String pathToWords = "words.txt"; File file = new File(pathToWords); HashMap<Double, String> categoriesDict = new HashMap<>(); HashMap<String, String> resultado = new HashMap<>(); FileInputStream fis = null; try { fis = new FileInputStream(pathToCategories); // Construct BufferedReader from InputStreamReader BufferedReader br = new BufferedReader(new InputStreamReader(fis)); String line = null; while ((line = br.readLine()) != null) { String[] words = line.split(" "); categoriesDict.put(Double.valueOf(words[0]), words[1]); } br.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Path donde estaran las categorias String pathCategories = "src/main/resources/categoriestest/"; // Configuracion basica de la aplicacion SparkConf sparkConf = new SparkConf().setAppName("NaiveBayesTest").setMaster("local[*]"); // Creacion del contexto JavaSparkContext jsc = new JavaSparkContext(sparkConf); NaiveBayesModel model = NaiveBayesModel.load(jsc.sc(), pathResults); HashMap<String, String> dictionary = loadDictionary(); JavaRDD<String> fileWords = null; if (file.exists()) { JavaRDD<String> input = jsc.textFile(pathToWords); fileWords = input.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) throws Exception { return Arrays.asList(s.split(" ")); } }); } else { System.out.println("Error, there is no words"); System.exit(-1); } ArrayList<String> aFileWords = (ArrayList<String>) fileWords.collect(); // Cogemos el fichero en el que se encuentran las categorias File dir = new File(pathCategories); for (File f : dir.listFiles()) { JavaRDD<String> input = jsc.textFile(f.getPath()); JavaRDD<String> words = input.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) throws Exception { return Arrays.asList(s.split(" ")); } }); JavaPairRDD<String, Double> wordCount = Reducer.parseWords(words, dictionary); List<Tuple2<String, Double>> total = wordCount.collect(); List<Tuple2<String, Double>> elementsRemoved = new ArrayList<>(); for (Tuple2<String, Double> t : total) { if (!t._1.equals("")) { elementsRemoved.add(new Tuple2<>(t._1, t._2 / wordCount.count())); } } ArrayList<Tuple2<String, Double>> freqFinal = new ArrayList<>(); for (String s : aFileWords) { boolean found = false; for (Tuple2<String, Double> t : elementsRemoved) { if (t._1.equals(s)) { found = true; freqFinal.add(t); break; } } if (!found) { freqFinal.add(new Tuple2<String, Double>(s, 0.0)); } } double[] v = new double[freqFinal.size()]; for (int i = 0; i < freqFinal.size(); i++) { Tuple2<String, Double> t = freqFinal.get(i); v[i] = t._2; } org.apache.spark.mllib.linalg.Vector vector = Vectors.dense(v); /**/ double d = model.predict(vector); System.out.println(categoriesDict.get(d)); resultado.put(f.getName(), categoriesDict.get(d)); } jsc.stop(); try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } for (String key : resultado.keySet()) { System.out.println(key + " - " + resultado.get(key)); } }
public void run() throws IOException, URISyntaxException { final SparkConf conf = new SparkConf().setAppName("FixMipmapUrlClient"); final JavaSparkContext sparkContext = new JavaSparkContext(conf); final String sparkAppId = sparkContext.getConf().getAppId(); final String executorsJson = LogUtilities.getExecutorsApiJson(sparkAppId); LOG.info("run: appId is {}, executors data is {}", sparkAppId, executorsJson); final RenderDataClient sourceDataClient = new RenderDataClient(parameters.baseDataUrl, parameters.owner, parameters.project); final List<Double> zValues = sourceDataClient.getStackZValues(parameters.stack, parameters.minZ, parameters.maxZ); if (zValues.size() == 0) { throw new IllegalArgumentException("source stack does not contain any matching z values"); } final RenderDataClient targetDataClient = new RenderDataClient( parameters.baseDataUrl, parameters.getTargetOwner(), parameters.getTargetProject()); final StackMetaData targetStackMetaData = targetDataClient.getStackMetaData(parameters.getTargetStack()); if (!targetStackMetaData.isLoading()) { throw new IllegalArgumentException( "target stack must be in the loading state, meta data is " + targetStackMetaData); } final LinkedHashMap<Pattern, String> replacementData = loadReplacementData(parameters.replacementDataFile); final JavaRDD<Double> rddZValues = sparkContext.parallelize(zValues); final Function<Double, Integer> transformFunction = new Function<Double, Integer>() { final @Override public Integer call(final Double z) throws Exception { LogUtilities.setupExecutorLog4j("z " + z); // get the source client final RenderDataClient sourceDataClient = new RenderDataClient(parameters.baseDataUrl, parameters.owner, parameters.project); // get the target client(which can be the same as the source) final RenderDataClient targetDataClient = new RenderDataClient( parameters.baseDataUrl, parameters.getTargetOwner(), parameters.getTargetProject()); final ResolvedTileSpecCollection sourceCollection = sourceDataClient.getResolvedTiles(parameters.stack, z); final boolean fixImage = UrlType.BOTH.equals(parameters.urlType) || UrlType.IMAGE.equals(parameters.urlType); final boolean fixMask = UrlType.BOTH.equals(parameters.urlType) || UrlType.MASK.equals(parameters.urlType); boolean fixedAtLeastOneSpec = false; ImageAndMask imageAndMask; ImageAndMask fixedImageAndMask; String imageUrl; String maskUrl; for (final TileSpec tileSpec : sourceCollection.getTileSpecs()) { final Map.Entry<Integer, ImageAndMask> maxEntry = tileSpec.getFloorMipmapEntry(Integer.MAX_VALUE); if (maxEntry != null) { for (int level = maxEntry.getKey(); level >= 0; level--) { imageAndMask = tileSpec.getMipmap(level); if (imageAndMask != null) { if (fixImage) { imageUrl = imageAndMask.getImageUrl(); for (final Pattern p : replacementData.keySet()) { imageUrl = fixUrl(p, imageUrl, replacementData.get(p)); } } else { imageUrl = imageAndMask.getImageUrl(); } if (fixMask) { maskUrl = imageAndMask.getMaskUrl(); for (final Pattern p : replacementData.keySet()) { maskUrl = fixUrl(p, maskUrl, replacementData.get(p)); } } else { maskUrl = imageAndMask.getMaskUrl(); } fixedImageAndMask = new ImageAndMask(imageUrl, maskUrl); fixedImageAndMask.validate(); final boolean imagePathChanged = fixImage && (!imageUrl.equals(imageAndMask.getImageUrl())); final boolean maskPathChanged = fixMask && (!maskUrl.equals(imageAndMask.getMaskUrl())); if (imagePathChanged || maskPathChanged) { fixedAtLeastOneSpec = true; tileSpec.putMipmap(level, fixedImageAndMask); } } } } } if (fixedAtLeastOneSpec) { targetDataClient.saveResolvedTiles(sourceCollection, parameters.getTargetStack(), z); } else { LOG.info("no changes necessary for z {}", z); } return sourceCollection.getTileCount(); } }; // assign a transformation to the RDD final JavaRDD<Integer> rddTileCounts = rddZValues.map(transformFunction); // use an action to get the results final List<Integer> tileCountList = rddTileCounts.collect(); long total = 0; for (final Integer tileCount : tileCountList) { total += tileCount; } LOG.info("run: collected stats"); LOG.info("run: saved {} tiles and transforms", total); sparkContext.stop(); }