Пример #1
0
  public static void main(String[] args) throws Exception {

    SparkConf sparkConf = new SparkConf().setAppName("ShopJsonParse");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    JavaRDD<String> ciku = ctx.textFile("hdfs://hadoop119:9000/ciku/ciku_zhuyu.txt", 1);
    JavaRDD<String> zhuyu =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("1")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    JavaRDD<String> haoping =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("2")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    JavaRDD<String> chaping =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("3")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    final List<String> zhuyulist = zhuyu.collect();
    final List<String> hplist = haoping.collect();
    final List<String> cplist = chaping.collect();

    JavaRDD<String> mongoratedata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ratelist.json");

    JavaRDD<Map<String, Object>> mongorateall =
        mongoratedata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return ParseLineToMap(line);
              }

              private Map<String, Object> ParseLineToMap(String line) {
                Map<String, Object> documentMap = new HashMap<String, Object>();
                try {
                  JSONObject jsonline = new JSONObject(line);
                  documentMap.put("PlatformItemId", jsonline.get("nid").toString());

                  Gson gson = new Gson();
                  rate rate = gson.fromJson(jsonline.get("rate").toString(), rate.class);
                  documentMap.put("ratelist", rate.parsemod());
                } catch (JSONException e) {
                  e.printStackTrace();
                }
                return documentMap;
              }
            });

    JavaPairRDD<String, String> Rates =
        mongorateall.flatMapToPair(
            new PairFlatMapFunction<Map<String, Object>, String, String>() {
              @Override
              public Iterable<Tuple2<String, String>> call(Map<String, Object> map)
                  throws Exception {
                ArrayList<Tuple2<String, String>> flatmaps =
                    new ArrayList<Tuple2<String, String>>();

                String itemid = (String) map.get("PlatformItemId");
                String itempro = "";

                Map<String, String> ratelist = (Map<String, String>) map.get("ratelist");
                if (ratelist == null) {
                  itempro = "null";
                  flatmaps.add(new Tuple2<String, String>(itemid, itempro));
                } else {
                  for (String value : ratelist.values()) {
                    itempro = value;
                    flatmaps.add(new Tuple2<String, String>(itemid, itempro));
                  }
                }
                return flatmaps;
              }
            });

    final Pattern SPACES = Pattern.compile("\\s+");
    JavaPairRDD<String, String> sentences =
        Rates.flatMapValues(
            new Function<String, Iterable<String>>() {
              @Override
              public Iterable<String> call(String s) throws Exception {
                ArrayList<String> list = new ArrayList<String>();
                if (s.contains(" ")) {
                  String[] str = SPACES.split(s);
                  int num = 0;
                  while (num < str.length) {
                    if (!str[num].equals("")) list.add(str[num]);
                    num++;
                  }
                } else {
                  list.add(s);
                }
                return list;
              }
            });

    String filter = "的 也 很 都 了 非常 有些 还 是 点 些 就 看起来 看上去 更 呢 哦 确实 什么的 较 太 啊 吧 得 那么 什么 挺";
    final String[] list = filter.split(" ");
    JavaPairRDD<String, String> words =
        sentences.mapValues(
            new Function<String, String>() {
              @Override
              public String call(String s) throws Exception {
                if (s.length() < 3) {
                  return s + " " + "kaer";
                }
                for (int i = 0; i < zhuyulist.size(); i++) {
                  String zhuyu = zhuyulist.get(i);
                  if (s.contains(zhuyu)) {
                    s = s.replace(zhuyu, " ");
                    int size = s.length();
                    int tap = s.lastIndexOf(" ");
                    String ss = "kaer";
                    if (tap + 1 < size) {
                      ss = s.substring(tap + 1, size);
                    } else {
                      if (tap - 1 > 0) ss = s.substring(0, tap - 1);
                    }
                    for (String tem : list) {
                      if (ss.contains(tem)) ss = ss.replace(tem, "");
                    }
                    return zhuyu + " " + ss;
                  }
                }
                return "long null";
              }
            });

    JavaPairRDD<String, String> filterwords =
        words
            .mapValues(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String tempstr;
                    if (s.contains("kaer")) {
                      tempstr = s.substring(0, s.indexOf(" "));
                      for (int i = 0; i < cplist.size(); i++) {
                        if (tempstr.equals(cplist.get(i))) return "差评 " + "," + tempstr;
                      }
                      for (int i = 0; i < hplist.size(); i++) {
                        if (tempstr.equals(hplist.get(i))) return "好评 " + "," + tempstr;
                      }
                      return "中评 " + "," + tempstr;
                    } else if (s.contains("null")) {
                      return s + ",null";
                    } else {
                      if (s.endsWith(" ")) return "long null,null";
                      tempstr = s.split(" ")[1];
                      for (int i = 0; i < cplist.size(); i++) {
                        if (tempstr.equals(cplist.get(i)))
                          return "差评 " + s.split(" ")[0] + "," + tempstr;
                      }
                      for (int i = 0; i < hplist.size(); i++) {
                        if (tempstr.equals(hplist.get(i)))
                          return "好评 " + s.split(" ")[0] + "," + tempstr;
                      }
                      return "中评 " + s.split(" ")[0] + "," + tempstr;
                    }
                  }
                })
            .filter(
                new Function<Tuple2<String, String>, Boolean>() {
                  @Override
                  public Boolean call(Tuple2<String, String> line) throws Exception {
                    if (line._2.contains("null")) return false;
                    else return true;
                  }
                });

    JavaPairRDD<String, String> ones =
        filterwords.mapToPair(
            new PairFunction<Tuple2<String, String>, String, String>() {
              @Override
              public Tuple2<String, String> call(Tuple2<String, String> line) throws Exception {
                String key = line._1();
                String value = "0,0,0", ll = line._2;
                if (ll.startsWith("好评")) value = "1,0,0";
                else if (ll.startsWith("中评")) value = "0,1,0";
                else if (ll.startsWith("差评")) value = "0,0,1";
                return new Tuple2<String, String>(key, value);
              }
            });

    JavaPairRDD<String, String> result =
        ones.reduceByKey(
            new Function2<String, String, String>() {
              @Override
              public String call(String s1, String s2) throws Exception {
                double h1 = Double.parseDouble(s1.split(",")[0]),
                    h2 = Double.parseDouble(s1.split(",")[1]),
                    h3 = Double.parseDouble(s1.split(",")[2]);
                double hh1 = Double.parseDouble(s2.split(",")[0]),
                    hh2 = Double.parseDouble(s2.split(",")[1]),
                    hh3 = Double.parseDouble(s2.split(",")[2]);
                return (h1 + hh1) + "," + (h2 + hh2) + "," + (h3 + hh3);
              }
            });

    JavaPairRDD<String, Integer> rateresult =
        result.mapValues(
            new Function<String, Integer>() {
              @Override
              public Integer call(String s1) throws Exception {
                double h1 = Double.parseDouble(s1.split(",")[0]),
                    h2 = Double.parseDouble(s1.split(",")[1]),
                    h3 = Double.parseDouble(s1.split(",")[2]);
                if (h1 + h3 == 0) return 50;
                else {
                  return (int) (h1 / (h1 + h3) * 100);
                }
              }
            });

    JavaRDD<String> mongocontentdata =
        ctx.textFile("hdfs://hadoop119:9000/shopdata/ProductContent.json");

    JavaRDD<Map<String, Object>> mongocontentall =
        mongocontentdata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return new ShopParse().ParseLine(line);
              }
            });

    JavaPairRDD<String, Map<String, Object>> content =
        mongocontentall.mapToPair(
            new PairFunction<Map<String, Object>, String, Map<String, Object>>() {
              @Override
              public Tuple2<String, Map<String, Object>> call(Map<String, Object> map)
                  throws Exception {
                return new Tuple2<String, Map<String, Object>>(
                    map.get("PlatformItemId").toString(), map);
              }
            });

    JavaRDD<String> mongoproListdata =
        ctx.textFile("hdfs://hadoop119:9000/shopdata/productList.json");

    JavaRDD<Map<String, Object>> mongoproListall =
        mongoproListdata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return new ShopParse().ParseproList(line);
              }
            });
    System.out.println("mongoproListall counts :" + mongoproListall.count());

    JavaPairRDD<String, Map<String, Object>> proList =
        mongoproListall
            .mapToPair(
                new PairFunction<Map<String, Object>, String, Map<String, Object>>() {
                  @Override
                  public Tuple2<String, Map<String, Object>> call(Map<String, Object> map)
                      throws Exception {
                    return new Tuple2<String, Map<String, Object>>(
                        map.get("PlatformItemId").toString(), map);
                  }
                })
            .filter(
                new Function<Tuple2<String, Map<String, Object>>, Boolean>() {
                  @Override
                  public Boolean call(Tuple2<String, Map<String, Object>> line) throws Exception {
                    if (line._2.get("isdownloads").toString().equals("true")) return true;
                    else return false;
                  }
                });
    System.out.println("proList counts :" + proList.count());

    JavaRDD<Map<String, Object>> ContJoinPro =
        content
            .join(proList)
            .map(
                new Function<
                    Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>>,
                    Map<String, Object>>() {
                  @Override
                  public Map<String, Object> call(
                      Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>> line)
                      throws Exception {
                    Map<String, Object> mapprod = line._2._1;
                    mapprod.put("Name", line._2._2.get("Name"));
                    mapprod.put("Photo", line._2._2.get("Photo"));
                    mapprod.put("SellerId", line._2._2.get("SellerId"));
                    mapprod.put("StoreName", line._2._2.get("StoreName"));
                    mapprod.put("Url", line._2._2.get("Url"));
                    mapprod.put("TaokeUrl", line._2._2.get("TaokeUrl"));
                    return mapprod;
                  }
                });

    JavaPairRDD<String, String> Messages =
        ContJoinPro.mapToPair(
            new PairFunction<Map<String, Object>, String, String>() {
              @Override
              public Tuple2<String, String> call(Map<String, Object> map) throws Exception {
                String itemid = (String) map.get("PlatformItemId");
                String itempro = "";
                String From = (String) map.get("isTmall");
                if (From.equals("true")) From = "2";
                else From = "1";
                String Quantity = (String) map.get("Quantity");
                String CmtCount = (String) map.get("ratecount");
                String ImgPath = (String) map.get("detailmessage");
                String[] ImgPaths = ImgPath.split("@=@=@"); // 1-5
                String mobprice = (String) map.get("mobmessage");
                String pcprice = (String) map.get("pcpricemessage");
                String minmaxPrice = (String) map.get("MaxMinPrice");
                String OriginalPrice = (String) map.get("OriginalPrice");
                double p1 = Double.parseDouble(mobprice);
                double p2 = Double.parseDouble(pcprice.split("@=@=@")[0]);
                double min = Double.parseDouble(minmaxPrice.split(",")[0]);
                double max = Double.parseDouble(minmaxPrice.split(",")[1]);
                double origin = Double.parseDouble(OriginalPrice);
                double Price = p1;
                if (Price > p2) Price = p2;
                if (Price == 100000.00) Price = min;
                if (origin < max) OriginalPrice = max + "";

                String IsPost = "0";
                if (!pcprice.endsWith("@=@=@") && pcprice.split("@=@=@")[1].startsWith("0.00"))
                  IsPost = "1";

                String Name = (String) map.get("Name");
                String SellerId = (String) map.get("SellerId");
                String StoreName = (String) map.get("StoreName");
                String Photo = (String) map.get("Photo");
                String Url = (String) map.get("Url");
                String TaokeUrl = (String) map.get("TaokeUrl");

                DecimalFormat ddf = new DecimalFormat("#0.00");
                String Discount = ddf.format(Price / Double.parseDouble(OriginalPrice)) + "";

                SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                String AddTime = df.format(new Date());
                String IsSell = "1";
                String Type = "2";
                String IsChangeImgPath = "0";
                String HotKeyId = "0";
                String OpenIid = "0";

                itempro =
                    From
                        + "@=@=@"
                        + Quantity
                        + "@=@=@"
                        + CmtCount
                        + "@=@=@"
                        + ImgPaths[0]
                        + "@=@=@"
                        + ImgPaths[1]
                        + "@=@=@"
                        + ImgPaths[2]
                        + "@=@=@"
                        + ImgPaths[3]
                        + "@=@=@"
                        + ImgPaths[4]
                        + "@=@=@"
                        + Price
                        + "@=@=@"
                        + IsPost
                        + "@=@=@"
                        + Name
                        + "@=@=@"
                        + SellerId
                        + "@=@=@"
                        + StoreName
                        + "@=@=@"
                        + OriginalPrice
                        + "@=@=@"
                        + Photo
                        + "@=@=@"
                        + Url
                        + "@=@=@"
                        + Discount
                        + "@=@=@"
                        + AddTime
                        + "@=@=@"
                        + IsSell
                        + "@=@=@"
                        + Type
                        + "@=@=@"
                        + IsChangeImgPath
                        + "@=@=@"
                        + HotKeyId
                        + "@=@=@"
                        + TaokeUrl
                        + "@=@=@"
                        + OpenIid;
                return new Tuple2<String, String>(itemid, itempro);
              }
            });

    JavaRDD<String> MessagesAll =
        Messages.leftOuterJoin(rateresult)
            .map(
                new Function<Tuple2<String, Tuple2<String, Optional<Integer>>>, String>() {
                  @Override
                  public String call(Tuple2<String, Tuple2<String, Optional<Integer>>> line)
                      throws Exception {
                    Optional<Integer> possible = line._2._2;
                    int fenshu = 50;
                    if (possible.isPresent()) fenshu = line._2._2.get();
                    return line._1 + "@=@=@" + line._2._1 + "@=@=@" + fenshu;
                  }
                });

    List<String> messages = MessagesAll.collect();
    new MessageToMysql().insert(messages);

    ctx.stop();
  }