public static void main(String[] args) throws Exception { SparkConf sparkConf = new SparkConf().setAppName("ShopJsonParse"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> ciku = ctx.textFile("hdfs://hadoop119:9000/ciku/ciku_zhuyu.txt", 1); JavaRDD<String> zhuyu = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("1")) return str[0]; else return "kaer"; } }) .distinct() .cache(); JavaRDD<String> haoping = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("2")) return str[0]; else return "kaer"; } }) .distinct() .cache(); JavaRDD<String> chaping = ciku.map( new Function<String, String>() { @Override public String call(String s) throws Exception { String[] str = s.split(" "); if (str[1].equals("3")) return str[0]; else return "kaer"; } }) .distinct() .cache(); final List<String> zhuyulist = zhuyu.collect(); final List<String> hplist = haoping.collect(); final List<String> cplist = chaping.collect(); JavaRDD<String> mongoratedata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ratelist.json"); JavaRDD<Map<String, Object>> mongorateall = mongoratedata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return ParseLineToMap(line); } private Map<String, Object> ParseLineToMap(String line) { Map<String, Object> documentMap = new HashMap<String, Object>(); try { JSONObject jsonline = new JSONObject(line); documentMap.put("PlatformItemId", jsonline.get("nid").toString()); Gson gson = new Gson(); rate rate = gson.fromJson(jsonline.get("rate").toString(), rate.class); documentMap.put("ratelist", rate.parsemod()); } catch (JSONException e) { e.printStackTrace(); } return documentMap; } }); JavaPairRDD<String, String> Rates = mongorateall.flatMapToPair( new PairFlatMapFunction<Map<String, Object>, String, String>() { @Override public Iterable<Tuple2<String, String>> call(Map<String, Object> map) throws Exception { ArrayList<Tuple2<String, String>> flatmaps = new ArrayList<Tuple2<String, String>>(); String itemid = (String) map.get("PlatformItemId"); String itempro = ""; Map<String, String> ratelist = (Map<String, String>) map.get("ratelist"); if (ratelist == null) { itempro = "null"; flatmaps.add(new Tuple2<String, String>(itemid, itempro)); } else { for (String value : ratelist.values()) { itempro = value; flatmaps.add(new Tuple2<String, String>(itemid, itempro)); } } return flatmaps; } }); final Pattern SPACES = Pattern.compile("\\s+"); JavaPairRDD<String, String> sentences = Rates.flatMapValues( new Function<String, Iterable<String>>() { @Override public Iterable<String> call(String s) throws Exception { ArrayList<String> list = new ArrayList<String>(); if (s.contains(" ")) { String[] str = SPACES.split(s); int num = 0; while (num < str.length) { if (!str[num].equals("")) list.add(str[num]); num++; } } else { list.add(s); } return list; } }); String filter = "的 也 很 都 了 非常 有些 还 是 点 些 就 看起来 看上去 更 呢 哦 确实 什么的 较 太 啊 吧 得 那么 什么 挺"; final String[] list = filter.split(" "); JavaPairRDD<String, String> words = sentences.mapValues( new Function<String, String>() { @Override public String call(String s) throws Exception { if (s.length() < 3) { return s + " " + "kaer"; } for (int i = 0; i < zhuyulist.size(); i++) { String zhuyu = zhuyulist.get(i); if (s.contains(zhuyu)) { s = s.replace(zhuyu, " "); int size = s.length(); int tap = s.lastIndexOf(" "); String ss = "kaer"; if (tap + 1 < size) { ss = s.substring(tap + 1, size); } else { if (tap - 1 > 0) ss = s.substring(0, tap - 1); } for (String tem : list) { if (ss.contains(tem)) ss = ss.replace(tem, ""); } return zhuyu + " " + ss; } } return "long null"; } }); JavaPairRDD<String, String> filterwords = words .mapValues( new Function<String, String>() { @Override public String call(String s) throws Exception { String tempstr; if (s.contains("kaer")) { tempstr = s.substring(0, s.indexOf(" ")); for (int i = 0; i < cplist.size(); i++) { if (tempstr.equals(cplist.get(i))) return "差评 " + "," + tempstr; } for (int i = 0; i < hplist.size(); i++) { if (tempstr.equals(hplist.get(i))) return "好评 " + "," + tempstr; } return "中评 " + "," + tempstr; } else if (s.contains("null")) { return s + ",null"; } else { if (s.endsWith(" ")) return "long null,null"; tempstr = s.split(" ")[1]; for (int i = 0; i < cplist.size(); i++) { if (tempstr.equals(cplist.get(i))) return "差评 " + s.split(" ")[0] + "," + tempstr; } for (int i = 0; i < hplist.size(); i++) { if (tempstr.equals(hplist.get(i))) return "好评 " + s.split(" ")[0] + "," + tempstr; } return "中评 " + s.split(" ")[0] + "," + tempstr; } } }) .filter( new Function<Tuple2<String, String>, Boolean>() { @Override public Boolean call(Tuple2<String, String> line) throws Exception { if (line._2.contains("null")) return false; else return true; } }); JavaPairRDD<String, String> ones = filterwords.mapToPair( new PairFunction<Tuple2<String, String>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, String> line) throws Exception { String key = line._1(); String value = "0,0,0", ll = line._2; if (ll.startsWith("好评")) value = "1,0,0"; else if (ll.startsWith("中评")) value = "0,1,0"; else if (ll.startsWith("差评")) value = "0,0,1"; return new Tuple2<String, String>(key, value); } }); JavaPairRDD<String, String> result = ones.reduceByKey( new Function2<String, String, String>() { @Override public String call(String s1, String s2) throws Exception { double h1 = Double.parseDouble(s1.split(",")[0]), h2 = Double.parseDouble(s1.split(",")[1]), h3 = Double.parseDouble(s1.split(",")[2]); double hh1 = Double.parseDouble(s2.split(",")[0]), hh2 = Double.parseDouble(s2.split(",")[1]), hh3 = Double.parseDouble(s2.split(",")[2]); return (h1 + hh1) + "," + (h2 + hh2) + "," + (h3 + hh3); } }); JavaPairRDD<String, Integer> rateresult = result.mapValues( new Function<String, Integer>() { @Override public Integer call(String s1) throws Exception { double h1 = Double.parseDouble(s1.split(",")[0]), h2 = Double.parseDouble(s1.split(",")[1]), h3 = Double.parseDouble(s1.split(",")[2]); if (h1 + h3 == 0) return 50; else { return (int) (h1 / (h1 + h3) * 100); } } }); JavaRDD<String> mongocontentdata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ProductContent.json"); JavaRDD<Map<String, Object>> mongocontentall = mongocontentdata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return new ShopParse().ParseLine(line); } }); JavaPairRDD<String, Map<String, Object>> content = mongocontentall.mapToPair( new PairFunction<Map<String, Object>, String, Map<String, Object>>() { @Override public Tuple2<String, Map<String, Object>> call(Map<String, Object> map) throws Exception { return new Tuple2<String, Map<String, Object>>( map.get("PlatformItemId").toString(), map); } }); JavaRDD<String> mongoproListdata = ctx.textFile("hdfs://hadoop119:9000/shopdata/productList.json"); JavaRDD<Map<String, Object>> mongoproListall = mongoproListdata.map( new Function<String, Map<String, Object>>() { @Override public Map<String, Object> call(String line) throws Exception { return new ShopParse().ParseproList(line); } }); System.out.println("mongoproListall counts :" + mongoproListall.count()); JavaPairRDD<String, Map<String, Object>> proList = mongoproListall .mapToPair( new PairFunction<Map<String, Object>, String, Map<String, Object>>() { @Override public Tuple2<String, Map<String, Object>> call(Map<String, Object> map) throws Exception { return new Tuple2<String, Map<String, Object>>( map.get("PlatformItemId").toString(), map); } }) .filter( new Function<Tuple2<String, Map<String, Object>>, Boolean>() { @Override public Boolean call(Tuple2<String, Map<String, Object>> line) throws Exception { if (line._2.get("isdownloads").toString().equals("true")) return true; else return false; } }); System.out.println("proList counts :" + proList.count()); JavaRDD<Map<String, Object>> ContJoinPro = content .join(proList) .map( new Function< Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>>, Map<String, Object>>() { @Override public Map<String, Object> call( Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>> line) throws Exception { Map<String, Object> mapprod = line._2._1; mapprod.put("Name", line._2._2.get("Name")); mapprod.put("Photo", line._2._2.get("Photo")); mapprod.put("SellerId", line._2._2.get("SellerId")); mapprod.put("StoreName", line._2._2.get("StoreName")); mapprod.put("Url", line._2._2.get("Url")); mapprod.put("TaokeUrl", line._2._2.get("TaokeUrl")); return mapprod; } }); JavaPairRDD<String, String> Messages = ContJoinPro.mapToPair( new PairFunction<Map<String, Object>, String, String>() { @Override public Tuple2<String, String> call(Map<String, Object> map) throws Exception { String itemid = (String) map.get("PlatformItemId"); String itempro = ""; String From = (String) map.get("isTmall"); if (From.equals("true")) From = "2"; else From = "1"; String Quantity = (String) map.get("Quantity"); String CmtCount = (String) map.get("ratecount"); String ImgPath = (String) map.get("detailmessage"); String[] ImgPaths = ImgPath.split("@=@=@"); // 1-5 String mobprice = (String) map.get("mobmessage"); String pcprice = (String) map.get("pcpricemessage"); String minmaxPrice = (String) map.get("MaxMinPrice"); String OriginalPrice = (String) map.get("OriginalPrice"); double p1 = Double.parseDouble(mobprice); double p2 = Double.parseDouble(pcprice.split("@=@=@")[0]); double min = Double.parseDouble(minmaxPrice.split(",")[0]); double max = Double.parseDouble(minmaxPrice.split(",")[1]); double origin = Double.parseDouble(OriginalPrice); double Price = p1; if (Price > p2) Price = p2; if (Price == 100000.00) Price = min; if (origin < max) OriginalPrice = max + ""; String IsPost = "0"; if (!pcprice.endsWith("@=@=@") && pcprice.split("@=@=@")[1].startsWith("0.00")) IsPost = "1"; String Name = (String) map.get("Name"); String SellerId = (String) map.get("SellerId"); String StoreName = (String) map.get("StoreName"); String Photo = (String) map.get("Photo"); String Url = (String) map.get("Url"); String TaokeUrl = (String) map.get("TaokeUrl"); DecimalFormat ddf = new DecimalFormat("#0.00"); String Discount = ddf.format(Price / Double.parseDouble(OriginalPrice)) + ""; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String AddTime = df.format(new Date()); String IsSell = "1"; String Type = "2"; String IsChangeImgPath = "0"; String HotKeyId = "0"; String OpenIid = "0"; itempro = From + "@=@=@" + Quantity + "@=@=@" + CmtCount + "@=@=@" + ImgPaths[0] + "@=@=@" + ImgPaths[1] + "@=@=@" + ImgPaths[2] + "@=@=@" + ImgPaths[3] + "@=@=@" + ImgPaths[4] + "@=@=@" + Price + "@=@=@" + IsPost + "@=@=@" + Name + "@=@=@" + SellerId + "@=@=@" + StoreName + "@=@=@" + OriginalPrice + "@=@=@" + Photo + "@=@=@" + Url + "@=@=@" + Discount + "@=@=@" + AddTime + "@=@=@" + IsSell + "@=@=@" + Type + "@=@=@" + IsChangeImgPath + "@=@=@" + HotKeyId + "@=@=@" + TaokeUrl + "@=@=@" + OpenIid; return new Tuple2<String, String>(itemid, itempro); } }); JavaRDD<String> MessagesAll = Messages.leftOuterJoin(rateresult) .map( new Function<Tuple2<String, Tuple2<String, Optional<Integer>>>, String>() { @Override public String call(Tuple2<String, Tuple2<String, Optional<Integer>>> line) throws Exception { Optional<Integer> possible = line._2._2; int fenshu = 50; if (possible.isPresent()) fenshu = line._2._2.get(); return line._1 + "@=@=@" + line._2._1 + "@=@=@" + fenshu; } }); List<String> messages = MessagesAll.collect(); new MessageToMysql().insert(messages); ctx.stop(); }