Beispiel #1
0
  public static void main(String[] args) {
    String logFile = "YOUR_SPARK_HOME/README.md"; // Should be some file on your system
    SparkConf conf = new SparkConf().setAppName("Simple Application");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaRDD<String> logData = sc.textFile(logFile).cache();

    long numAs =
        logData
            .filter(
                new Function<String, Boolean>() {
                  public Boolean call(String s) {
                    return s.contains("a");
                  }
                })
            .count();

    long numBs =
        logData
            .filter(
                new Function<String, Boolean>() {
                  public Boolean call(String s) {
                    return s.contains("b");
                  }
                })
            .count();

    System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
  }
  public static void main(String[] args) {
    SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkRecommendationEngine");
    JavaSparkContext sc = new JavaSparkContext(conf);

    // Load and parse the data
    String path = "data/test.csv";
    JavaRDD<String> data = sc.textFile(path);
    JavaRDD<Rating> ratings =
        data.map(
            new Function<String, Rating>() {
              public Rating call(String s) {
                String[] sarray = s.split(",");
                return new Rating(
                    Integer.parseInt(sarray[0]),
                    Integer.parseInt(sarray[1]),
                    Double.parseDouble(sarray[2]));
              }
            });

    // Build the recommendation model using ALS
    int rank = 10;
    int numIterations = 10;
    MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);

    // Evaluate the model on rating data
    JavaRDD<Tuple2<Object, Object>> userProducts =
        ratings.map(
            new Function<Rating, Tuple2<Object, Object>>() {
              public Tuple2<Object, Object> call(Rating r) {
                return new Tuple2<Object, Object>(r.user(), r.product());
              }
            });
    JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions =
        JavaPairRDD.fromJavaRDD(
            model
                .predict(JavaRDD.toRDD(userProducts))
                .toJavaRDD()
                .map(
                    new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
                      public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) {
                        return new Tuple2<Tuple2<Integer, Integer>, Double>(
                            new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
                      }
                    }));
    JavaRDD<Tuple2<Double, Double>> ratesAndPreds =
        JavaPairRDD.fromJavaRDD(
                ratings.map(
                    new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
                      public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) {
                        return new Tuple2<Tuple2<Integer, Integer>, Double>(
                            new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
                      }
                    }))
            .join(predictions)
            .values();
    double MSE =
        JavaDoubleRDD.fromRDD(
                ratesAndPreds
                    .map(
                        new Function<Tuple2<Double, Double>, Object>() {
                          public Object call(Tuple2<Double, Double> pair) {
                            Double err = pair._1() - pair._2();
                            return err * err;
                          }
                        })
                    .rdd())
            .mean();
    System.out.println("Mean Squared Error = " + MSE);

    // Save and load model
    model.save(sc.sc(), "myModelPath");
    MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath");
  }