@Override
  public void processInstruction(ExecutionContext ec)
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;

    // get rdd and broadcast inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> inX =
        sec.getBinaryBlockRDDHandleForVariable(_input1.getName());
    PartitionedBroadcastMatrix inV = sec.getBroadcastForVariable(_input2.getName());

    // execute mapmmchain (guaranteed to have single output block)
    MatrixBlock out = null;
    if (_chainType == ChainType.XtXv) {
      RDDMapMMChainFunction fmmc = new RDDMapMMChainFunction(inV);
      JavaPairRDD<MatrixIndexes, MatrixBlock> tmp = inX.mapValues(fmmc);
      out = RDDAggregateUtils.sumStable(tmp);
    } else { // ChainType.XtwXv
      PartitionedBroadcastMatrix inW = sec.getBroadcastForVariable(_input3.getName());
      RDDMapMMChainFunction2 fmmc = new RDDMapMMChainFunction2(inV, inW);
      JavaPairRDD<MatrixIndexes, MatrixBlock> tmp = inX.mapToPair(fmmc);
      out = RDDAggregateUtils.sumStable(tmp);
    }

    // put output block into symbol table (no lineage because single block)
    // this also includes implicit maintenance of matrix characteristics
    sec.setMatrixOutput(_output.getName(), out);
  }
  private void getInvertedLexicon() {
    this.invertedLexicon =
        lexicon.mapToPair(
            new PairFunction<Tuple2<String, Long>, Long, String>() {

              private static final long serialVersionUID = 1L;

              @Override
              public Tuple2<Long, String> call(Tuple2<String, Long> wordEntry) throws Exception {
                return new Tuple2<Long, String>(wordEntry._2, wordEntry._1);
              }
            });
  }
  @Override
  public void processInstruction(ExecutionContext ec)
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;

    String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
    String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
    MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
    MatrixCharacteristics mcBc = sec.getMatrixCharacteristics(bcastVar);

    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcastMatrix in2 = sec.getBroadcastForVariable(bcastVar);

    // empty input block filter
    if (!_outputEmpty) in1 = in1.filter(new FilterNonEmptyBlocksFunction());

    // execute mapmult instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (requiresFlatMapFunction(_type, mcBc))
      out = in1.flatMapToPair(new RDDFlatMapMMFunction(_type, in2));
    else if (preservesPartitioning(mcRdd, _type))
      out = in1.mapPartitionsToPair(new RDDMapMMPartitionFunction(_type, in2), true);
    else out = in1.mapToPair(new RDDMapMMFunction(_type, in2));

    // empty output block filter
    if (!_outputEmpty) out = out.filter(new FilterNonEmptyBlocksFunction());

    // perform aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
      MatrixBlock out2 = RDDAggregateUtils.sumStable(out);

      // put output block into symbol table (no lineage because single block)
      // this also includes implicit maintenance of matrix characteristics
      sec.setMatrixOutput(output.getName(), out2);
    } else // MULTI_BLOCK or NONE
    {
      if (_aggtype == SparkAggType.MULTI_BLOCK) out = RDDAggregateUtils.sumByKeyStable(out);

      // put output RDD handle into symbol table
      sec.setRDDHandleForVariable(output.getName(), out);
      sec.addLineageRDD(output.getName(), rddVar);
      sec.addLineageBroadcast(output.getName(), bcastVar);

      // update output statistics if not inferred
      updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
  }
Пример #4
0
  public static void main(String[] args) {
    if (args.length == 0) {
      System.err.println("Usage: JavaTC <host> [<slices>]");
      System.exit(1);
    }

    JavaSparkContext sc =
        new JavaSparkContext(
            args[0],
            "JavaTC",
            System.getenv("SPARK_HOME"),
            JavaSparkContext.jarOfClass(JavaTC.class));
    Integer slices = (args.length > 1) ? Integer.parseInt(args[1]) : 2;
    JavaPairRDD<Integer, Integer> tc = sc.parallelizePairs(generateGraph(), slices).cache();

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    JavaPairRDD<Integer, Integer> edges =
        tc.mapToPair(
            new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() {
              @Override
              public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> e) {
                return new Tuple2<Integer, Integer>(e._2(), e._1());
              }
            });

    long oldCount = 0;
    long nextCount = tc.count();
    do {
      oldCount = nextCount;
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).mapToPair(ProjectFn.INSTANCE)).distinct().cache();
      nextCount = tc.count();
    } while (nextCount != oldCount);

    System.out.println("TC has " + tc.count() + " edges.");
    System.exit(0);
  }
  @Override
  public void processInstruction(ExecutionContext ec)
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    long rlen = mc.getRows();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();

    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in =
        sec.getBinaryBlockRDDHandleForVariable(input1.getName());

    // execute unary aggregate (w/ implicit drop correction)
    AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out =
        in.mapToPair(new RDDCumAggFunction(auop, rlen, brlen, bclen));
    out = RDDAggregateUtils.mergeByKey(out);

    // put output handle in symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
  }
  public static void main(String[] args) throws IOException {
    Parameters param = new Parameters();
    long initTime = System.currentTimeMillis();

    SparkConf conf = new SparkConf().setAppName("StarJoin");
    JavaSparkContext sc = new JavaSparkContext(conf);

    if (param.useKryo) {
      conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
      conf.set("spark.kryo.registrator", MyBloomFilter.BloomFilterRegistrator.class.getName());
      conf.set("spark.kryoserializer.buffer.mb", param.buffer);
    }

    MyBloomFilter.BloomFilter<String> BFS =
        new MyBloomFilter.BloomFilter(1.0, param.bitsS, param.hashes);
    MyBloomFilter.BloomFilter<String> BFD =
        new MyBloomFilter.BloomFilter(1.0, param.bitsD, param.hashes);
    MyBloomFilter.BloomFilter<String> BFC =
        new MyBloomFilter.BloomFilter(1.0, param.bitsC, param.hashes);

    JavaPairRDD<String, String> supps =
        sc.textFile(param.suppPath)
            .map(
                new Function<String, String[]>() {
                  public String[] call(String line) {
                    return line.split("\\|");
                  }
                })
            .filter(
                new Function<String[], Boolean>() {
                  public Boolean call(String[] s) {
                    return s[3].equals("UNITED KI1") | s[3].equals("UNITED KI5");
                  }
                })
            .mapToPair(
                new PairFunction<String[], String, String>() {
                  public Tuple2<String, String> call(String[] s) {
                    return new Tuple2<String, String>(s[0], s[3]);
                  }
                });

    List<Tuple2<String, String>> s = supps.collect();
    for (int i = 0; i < s.size(); i++) {
      BFS.add(s.get(i)._1);
    }

    final Broadcast<MyBloomFilter.BloomFilter<String>> varS = sc.broadcast(BFS);

    JavaPairRDD<String, String> custs =
        sc.textFile(param.custPath)
            .map(
                new Function<String, String[]>() {
                  public String[] call(String line) {
                    return line.split("\\|");
                  }
                })
            .filter(
                new Function<String[], Boolean>() {
                  public Boolean call(String[] s) {
                    return s[3].equals("UNITED KI1") | s[3].equals("UNITED KI5");
                  }
                })
            .mapToPair(
                new PairFunction<String[], String, String>() {
                  public Tuple2<String, String> call(String[] s) {
                    return new Tuple2<String, String>(s[0], s[3]);
                  }
                });

    List<Tuple2<String, String>> c = custs.collect();
    for (int i = 0; i < c.size(); i++) {
      BFC.add(c.get(i)._1);
    }

    final Broadcast<MyBloomFilter.BloomFilter<String>> varC = sc.broadcast(BFC);

    JavaPairRDD<String, String> dates =
        sc.textFile(param.datePath)
            .map(
                new Function<String, String[]>() {
                  public String[] call(String line) {
                    return line.split("\\|");
                  }
                })
            .filter(
                new Function<String[], Boolean>() {
                  public Boolean call(String[] s) {
                    return s[6].equals("Dec1997");
                  }
                })
            .mapToPair(
                new PairFunction<String[], String, String>() {
                  public Tuple2<String, String> call(String[] s) {
                    return new Tuple2<String, String>(s[0], s[4]);
                  }
                });

    List<Tuple2<String, String>> d = dates.collect();
    for (int i = 0; i < d.size(); i++) {
      BFD.add(d.get(i)._1);
    }

    final Broadcast<MyBloomFilter.BloomFilter<String>> varD = sc.broadcast(BFD);

    JavaPairRDD<String, String[]> lines =
        sc.textFile(param.linePath)
            .map(
                new Function<String, String[]>() {
                  public String[] call(String line) {
                    return line.split("\\|");
                  }
                })
            .filter(
                new Function<String[], Boolean>() {
                  public Boolean call(String[] s) {
                    return varC.value().contains(s[2].getBytes())
                        & varS.value().contains(s[4].getBytes())
                        & varD.value().contains(s[5].getBytes());
                  }
                })
            .mapToPair(
                new PairFunction<String[], String, String[]>() {
                  public Tuple2<String, String[]> call(String[] s) {
                    String[] v = {s[2], s[5], s[12]};
                    return new Tuple2<String, String[]>(s[4], v);
                  }
                });

    JavaPairRDD<String, String[]> result =
        lines
            .join(supps)
            .mapToPair(
                new PairFunction<Tuple2<String, Tuple2<String[], String>>, String, String[]>() {
                  public Tuple2<String, String[]> call(Tuple2<String, Tuple2<String[], String>> s) {
                    String[] v = {s._2._1[1], s._2._1[2], s._2._2};
                    return new Tuple2<String, String[]>(s._2._1[0], v);
                  }
                });

    result =
        result
            .join(custs)
            .mapToPair(
                new PairFunction<Tuple2<String, Tuple2<String[], String>>, String, String[]>() {
                  public Tuple2<String, String[]> call(Tuple2<String, Tuple2<String[], String>> s) {
                    String[] v = {s._2._1[1], s._2._1[2], s._2._2};
                    return new Tuple2<String, String[]>(s._2._1[0], v);
                  }
                });

    JavaPairRDD<String, Long> final_result =
        result
            .join(dates)
            .mapToPair(
                new PairFunction<Tuple2<String, Tuple2<String[], String>>, String, Long>() {
                  public Tuple2<String, Long> call(Tuple2<String, Tuple2<String[], String>> s) {
                    return new Tuple2<String, Long>(
                        s._2._1[2] + "," + s._2._1[1] + "," + s._2._2, Long.parseLong(s._2._1[0]));
                  }
                })
            .reduceByKey(
                new Function2<Long, Long, Long>() {
                  public Long call(Long i1, Long i2) {
                    return i1 + i2;
                  }
                });

    JavaPairRDD<String, String> sub_result =
        final_result.mapToPair(
            new PairFunction<Tuple2<String, Long>, String, String>() {
              public Tuple2<String, String> call(Tuple2<String, Long> line) {
                return new Tuple2(line._1 + "," + line._2.toString(), null);
              }
            });

    final_result =
        sub_result
            .sortByKey(new Q3Comparator())
            .mapToPair(
                new PairFunction<Tuple2<String, String>, String, Long>() {
                  public Tuple2<String, Long> call(Tuple2<String, String> line) {
                    String[] s = line._1.split(",");
                    return new Tuple2<String, Long>(
                        s[0] + "," + s[1] + "," + s[2], Long.parseLong(s[3]));
                  }
                });

    Configuration HDFSconf = new Configuration();
    FileSystem fs = FileSystem.get(HDFSconf);
    fs.delete(new Path(param.output), true);

    final_result.saveAsTextFile(param.output);

    long finalTime = System.currentTimeMillis();
    System.out.print("Tempo total(ms): ");
    System.out.println(finalTime - initTime);

    sc.close();
  }
Пример #7
0
  public static void main(String[] args) {

    // STEP 1: create a SparkConf object
    if (args.length < 1) {
      log.fatal("Syntax Error: there must be one argument (a file name or a directory)");
      throw new RuntimeException();
    }

    // STEP 2: create a SparkConf object
    SparkConf sparkConf = new SparkConf().setAppName("Trending Topic");

    // STEP 3: create a Java Spark context
    JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);

    // STEP 4: read lines of files
    JavaRDD<String> lines = sparkContext.textFile(args[0]);

    JavaRDD<String> words;
    words =
        lines.flatMap(
            new FlatMapFunction<String, String>() {
              @Override
              public Iterable call(String s) throws Exception {
                return Arrays.asList(s.split("\t")[2].split(" "));
              }
            });

    JavaPairRDD<String, Integer> ones;
    ones =
        words.mapToPair(
            new PairFunction<String, String, Integer>() {
              @Override
              public Tuple2<String, Integer> call(String string) {
                return new Tuple2<>(string, 1);
              }
            });

    JavaPairRDD<String, Integer> counts;
    counts =
        ones.reduceByKey(
            new Function2<Integer, Integer, Integer>() {
              @Override
              public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
              }
            });

    // Es necesario invertir las tuplas ya que no podemos ordenar por valor, sino por clave
    JavaPairRDD<Integer, String> swapped;
    swapped =
        counts.mapToPair(
            new PairFunction<Tuple2<String, Integer>, Integer, String>() {
              @Override
              public Tuple2<Integer, String> call(Tuple2<String, Integer> tupla) throws Exception {
                return tupla.swap();
              }
            });

    // STEP 7: sort the results by key
    List<Tuple2<Integer, String>> output = swapped.sortByKey().collect();

    // El ejercicio dice que quitemos las palabras que no aportan nada. Para ello podríamos ponerlas
    // en un fichero y leerlas y luego obviar esas. Vamos a obviar esa parte ya que se entiende y no
    // es el caso del ejercicio
    List<String> excluyentes = new LinkedList<>();
    excluyentes.add("rt");
    excluyentes.add("http");
    excluyentes.add("https");
    excluyentes.add("www");

    for (Tuple2<Integer, String> t : output) {
      if (excluyentes.contains(t._2)) {
        output.remove(t);
      }
    }

    // STEP 8: print the results
    for (int i = 0; i < 10; i++) {
      Tuple2<Integer, String> tuple;
      tuple = output.get(i);
      System.out.println(tuple._2() + ": " + tuple._1());
    }

    // STEP 9: stop the spark context
    sparkContext.stop();
  }
Пример #8
0
  public static void main(String[] args) throws Exception {

    SparkConf sparkConf = new SparkConf().setAppName("ShopJsonParse");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    JavaRDD<String> ciku = ctx.textFile("hdfs://hadoop119:9000/ciku/ciku_zhuyu.txt", 1);
    JavaRDD<String> zhuyu =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("1")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    JavaRDD<String> haoping =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("2")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    JavaRDD<String> chaping =
        ciku.map(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String[] str = s.split(" ");
                    if (str[1].equals("3")) return str[0];
                    else return "kaer";
                  }
                })
            .distinct()
            .cache();
    final List<String> zhuyulist = zhuyu.collect();
    final List<String> hplist = haoping.collect();
    final List<String> cplist = chaping.collect();

    JavaRDD<String> mongoratedata = ctx.textFile("hdfs://hadoop119:9000/shopdata/ratelist.json");

    JavaRDD<Map<String, Object>> mongorateall =
        mongoratedata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return ParseLineToMap(line);
              }

              private Map<String, Object> ParseLineToMap(String line) {
                Map<String, Object> documentMap = new HashMap<String, Object>();
                try {
                  JSONObject jsonline = new JSONObject(line);
                  documentMap.put("PlatformItemId", jsonline.get("nid").toString());

                  Gson gson = new Gson();
                  rate rate = gson.fromJson(jsonline.get("rate").toString(), rate.class);
                  documentMap.put("ratelist", rate.parsemod());
                } catch (JSONException e) {
                  e.printStackTrace();
                }
                return documentMap;
              }
            });

    JavaPairRDD<String, String> Rates =
        mongorateall.flatMapToPair(
            new PairFlatMapFunction<Map<String, Object>, String, String>() {
              @Override
              public Iterable<Tuple2<String, String>> call(Map<String, Object> map)
                  throws Exception {
                ArrayList<Tuple2<String, String>> flatmaps =
                    new ArrayList<Tuple2<String, String>>();

                String itemid = (String) map.get("PlatformItemId");
                String itempro = "";

                Map<String, String> ratelist = (Map<String, String>) map.get("ratelist");
                if (ratelist == null) {
                  itempro = "null";
                  flatmaps.add(new Tuple2<String, String>(itemid, itempro));
                } else {
                  for (String value : ratelist.values()) {
                    itempro = value;
                    flatmaps.add(new Tuple2<String, String>(itemid, itempro));
                  }
                }
                return flatmaps;
              }
            });

    final Pattern SPACES = Pattern.compile("\\s+");
    JavaPairRDD<String, String> sentences =
        Rates.flatMapValues(
            new Function<String, Iterable<String>>() {
              @Override
              public Iterable<String> call(String s) throws Exception {
                ArrayList<String> list = new ArrayList<String>();
                if (s.contains(" ")) {
                  String[] str = SPACES.split(s);
                  int num = 0;
                  while (num < str.length) {
                    if (!str[num].equals("")) list.add(str[num]);
                    num++;
                  }
                } else {
                  list.add(s);
                }
                return list;
              }
            });

    String filter = "的 也 很 都 了 非常 有些 还 是 点 些 就 看起来 看上去 更 呢 哦 确实 什么的 较 太 啊 吧 得 那么 什么 挺";
    final String[] list = filter.split(" ");
    JavaPairRDD<String, String> words =
        sentences.mapValues(
            new Function<String, String>() {
              @Override
              public String call(String s) throws Exception {
                if (s.length() < 3) {
                  return s + " " + "kaer";
                }
                for (int i = 0; i < zhuyulist.size(); i++) {
                  String zhuyu = zhuyulist.get(i);
                  if (s.contains(zhuyu)) {
                    s = s.replace(zhuyu, " ");
                    int size = s.length();
                    int tap = s.lastIndexOf(" ");
                    String ss = "kaer";
                    if (tap + 1 < size) {
                      ss = s.substring(tap + 1, size);
                    } else {
                      if (tap - 1 > 0) ss = s.substring(0, tap - 1);
                    }
                    for (String tem : list) {
                      if (ss.contains(tem)) ss = ss.replace(tem, "");
                    }
                    return zhuyu + " " + ss;
                  }
                }
                return "long null";
              }
            });

    JavaPairRDD<String, String> filterwords =
        words
            .mapValues(
                new Function<String, String>() {
                  @Override
                  public String call(String s) throws Exception {
                    String tempstr;
                    if (s.contains("kaer")) {
                      tempstr = s.substring(0, s.indexOf(" "));
                      for (int i = 0; i < cplist.size(); i++) {
                        if (tempstr.equals(cplist.get(i))) return "差评 " + "," + tempstr;
                      }
                      for (int i = 0; i < hplist.size(); i++) {
                        if (tempstr.equals(hplist.get(i))) return "好评 " + "," + tempstr;
                      }
                      return "中评 " + "," + tempstr;
                    } else if (s.contains("null")) {
                      return s + ",null";
                    } else {
                      if (s.endsWith(" ")) return "long null,null";
                      tempstr = s.split(" ")[1];
                      for (int i = 0; i < cplist.size(); i++) {
                        if (tempstr.equals(cplist.get(i)))
                          return "差评 " + s.split(" ")[0] + "," + tempstr;
                      }
                      for (int i = 0; i < hplist.size(); i++) {
                        if (tempstr.equals(hplist.get(i)))
                          return "好评 " + s.split(" ")[0] + "," + tempstr;
                      }
                      return "中评 " + s.split(" ")[0] + "," + tempstr;
                    }
                  }
                })
            .filter(
                new Function<Tuple2<String, String>, Boolean>() {
                  @Override
                  public Boolean call(Tuple2<String, String> line) throws Exception {
                    if (line._2.contains("null")) return false;
                    else return true;
                  }
                });

    JavaPairRDD<String, String> ones =
        filterwords.mapToPair(
            new PairFunction<Tuple2<String, String>, String, String>() {
              @Override
              public Tuple2<String, String> call(Tuple2<String, String> line) throws Exception {
                String key = line._1();
                String value = "0,0,0", ll = line._2;
                if (ll.startsWith("好评")) value = "1,0,0";
                else if (ll.startsWith("中评")) value = "0,1,0";
                else if (ll.startsWith("差评")) value = "0,0,1";
                return new Tuple2<String, String>(key, value);
              }
            });

    JavaPairRDD<String, String> result =
        ones.reduceByKey(
            new Function2<String, String, String>() {
              @Override
              public String call(String s1, String s2) throws Exception {
                double h1 = Double.parseDouble(s1.split(",")[0]),
                    h2 = Double.parseDouble(s1.split(",")[1]),
                    h3 = Double.parseDouble(s1.split(",")[2]);
                double hh1 = Double.parseDouble(s2.split(",")[0]),
                    hh2 = Double.parseDouble(s2.split(",")[1]),
                    hh3 = Double.parseDouble(s2.split(",")[2]);
                return (h1 + hh1) + "," + (h2 + hh2) + "," + (h3 + hh3);
              }
            });

    JavaPairRDD<String, Integer> rateresult =
        result.mapValues(
            new Function<String, Integer>() {
              @Override
              public Integer call(String s1) throws Exception {
                double h1 = Double.parseDouble(s1.split(",")[0]),
                    h2 = Double.parseDouble(s1.split(",")[1]),
                    h3 = Double.parseDouble(s1.split(",")[2]);
                if (h1 + h3 == 0) return 50;
                else {
                  return (int) (h1 / (h1 + h3) * 100);
                }
              }
            });

    JavaRDD<String> mongocontentdata =
        ctx.textFile("hdfs://hadoop119:9000/shopdata/ProductContent.json");

    JavaRDD<Map<String, Object>> mongocontentall =
        mongocontentdata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return new ShopParse().ParseLine(line);
              }
            });

    JavaPairRDD<String, Map<String, Object>> content =
        mongocontentall.mapToPair(
            new PairFunction<Map<String, Object>, String, Map<String, Object>>() {
              @Override
              public Tuple2<String, Map<String, Object>> call(Map<String, Object> map)
                  throws Exception {
                return new Tuple2<String, Map<String, Object>>(
                    map.get("PlatformItemId").toString(), map);
              }
            });

    JavaRDD<String> mongoproListdata =
        ctx.textFile("hdfs://hadoop119:9000/shopdata/productList.json");

    JavaRDD<Map<String, Object>> mongoproListall =
        mongoproListdata.map(
            new Function<String, Map<String, Object>>() {
              @Override
              public Map<String, Object> call(String line) throws Exception {
                return new ShopParse().ParseproList(line);
              }
            });
    System.out.println("mongoproListall counts :" + mongoproListall.count());

    JavaPairRDD<String, Map<String, Object>> proList =
        mongoproListall
            .mapToPair(
                new PairFunction<Map<String, Object>, String, Map<String, Object>>() {
                  @Override
                  public Tuple2<String, Map<String, Object>> call(Map<String, Object> map)
                      throws Exception {
                    return new Tuple2<String, Map<String, Object>>(
                        map.get("PlatformItemId").toString(), map);
                  }
                })
            .filter(
                new Function<Tuple2<String, Map<String, Object>>, Boolean>() {
                  @Override
                  public Boolean call(Tuple2<String, Map<String, Object>> line) throws Exception {
                    if (line._2.get("isdownloads").toString().equals("true")) return true;
                    else return false;
                  }
                });
    System.out.println("proList counts :" + proList.count());

    JavaRDD<Map<String, Object>> ContJoinPro =
        content
            .join(proList)
            .map(
                new Function<
                    Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>>,
                    Map<String, Object>>() {
                  @Override
                  public Map<String, Object> call(
                      Tuple2<String, Tuple2<Map<String, Object>, Map<String, Object>>> line)
                      throws Exception {
                    Map<String, Object> mapprod = line._2._1;
                    mapprod.put("Name", line._2._2.get("Name"));
                    mapprod.put("Photo", line._2._2.get("Photo"));
                    mapprod.put("SellerId", line._2._2.get("SellerId"));
                    mapprod.put("StoreName", line._2._2.get("StoreName"));
                    mapprod.put("Url", line._2._2.get("Url"));
                    mapprod.put("TaokeUrl", line._2._2.get("TaokeUrl"));
                    return mapprod;
                  }
                });

    JavaPairRDD<String, String> Messages =
        ContJoinPro.mapToPair(
            new PairFunction<Map<String, Object>, String, String>() {
              @Override
              public Tuple2<String, String> call(Map<String, Object> map) throws Exception {
                String itemid = (String) map.get("PlatformItemId");
                String itempro = "";
                String From = (String) map.get("isTmall");
                if (From.equals("true")) From = "2";
                else From = "1";
                String Quantity = (String) map.get("Quantity");
                String CmtCount = (String) map.get("ratecount");
                String ImgPath = (String) map.get("detailmessage");
                String[] ImgPaths = ImgPath.split("@=@=@"); // 1-5
                String mobprice = (String) map.get("mobmessage");
                String pcprice = (String) map.get("pcpricemessage");
                String minmaxPrice = (String) map.get("MaxMinPrice");
                String OriginalPrice = (String) map.get("OriginalPrice");
                double p1 = Double.parseDouble(mobprice);
                double p2 = Double.parseDouble(pcprice.split("@=@=@")[0]);
                double min = Double.parseDouble(minmaxPrice.split(",")[0]);
                double max = Double.parseDouble(minmaxPrice.split(",")[1]);
                double origin = Double.parseDouble(OriginalPrice);
                double Price = p1;
                if (Price > p2) Price = p2;
                if (Price == 100000.00) Price = min;
                if (origin < max) OriginalPrice = max + "";

                String IsPost = "0";
                if (!pcprice.endsWith("@=@=@") && pcprice.split("@=@=@")[1].startsWith("0.00"))
                  IsPost = "1";

                String Name = (String) map.get("Name");
                String SellerId = (String) map.get("SellerId");
                String StoreName = (String) map.get("StoreName");
                String Photo = (String) map.get("Photo");
                String Url = (String) map.get("Url");
                String TaokeUrl = (String) map.get("TaokeUrl");

                DecimalFormat ddf = new DecimalFormat("#0.00");
                String Discount = ddf.format(Price / Double.parseDouble(OriginalPrice)) + "";

                SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                String AddTime = df.format(new Date());
                String IsSell = "1";
                String Type = "2";
                String IsChangeImgPath = "0";
                String HotKeyId = "0";
                String OpenIid = "0";

                itempro =
                    From
                        + "@=@=@"
                        + Quantity
                        + "@=@=@"
                        + CmtCount
                        + "@=@=@"
                        + ImgPaths[0]
                        + "@=@=@"
                        + ImgPaths[1]
                        + "@=@=@"
                        + ImgPaths[2]
                        + "@=@=@"
                        + ImgPaths[3]
                        + "@=@=@"
                        + ImgPaths[4]
                        + "@=@=@"
                        + Price
                        + "@=@=@"
                        + IsPost
                        + "@=@=@"
                        + Name
                        + "@=@=@"
                        + SellerId
                        + "@=@=@"
                        + StoreName
                        + "@=@=@"
                        + OriginalPrice
                        + "@=@=@"
                        + Photo
                        + "@=@=@"
                        + Url
                        + "@=@=@"
                        + Discount
                        + "@=@=@"
                        + AddTime
                        + "@=@=@"
                        + IsSell
                        + "@=@=@"
                        + Type
                        + "@=@=@"
                        + IsChangeImgPath
                        + "@=@=@"
                        + HotKeyId
                        + "@=@=@"
                        + TaokeUrl
                        + "@=@=@"
                        + OpenIid;
                return new Tuple2<String, String>(itemid, itempro);
              }
            });

    JavaRDD<String> MessagesAll =
        Messages.leftOuterJoin(rateresult)
            .map(
                new Function<Tuple2<String, Tuple2<String, Optional<Integer>>>, String>() {
                  @Override
                  public String call(Tuple2<String, Tuple2<String, Optional<Integer>>> line)
                      throws Exception {
                    Optional<Integer> possible = line._2._2;
                    int fenshu = 50;
                    if (possible.isPresent()) fenshu = line._2._2.get();
                    return line._1 + "@=@=@" + line._2._1 + "@=@=@" + fenshu;
                  }
                });

    List<String> messages = MessagesAll.collect();
    new MessageToMysql().insert(messages);

    ctx.stop();
  }