Ejemplo n.º 1
0
  /**
   * There is no actual serialization of a massive factored matrix model into PMML. Instead, we
   * create an ad-hoc serialization where the model just contains pointers to files that contain the
   * matrix data, as Extensions.
   */
  private static PMML mfModelToPMML(
      MatrixFactorizationModel model,
      int features,
      double lambda,
      double alpha,
      boolean implicit,
      Path candidatePath) {
    saveFeaturesRDD(model.userFeatures(), new Path(candidatePath, "X"));
    saveFeaturesRDD(model.productFeatures(), new Path(candidatePath, "Y"));

    PMML pmml = PMMLUtils.buildSkeletonPMML();
    PMMLUtils.addExtension(pmml, "X", "X/");
    PMMLUtils.addExtension(pmml, "Y", "Y/");
    PMMLUtils.addExtension(pmml, "features", Integer.toString(features));
    PMMLUtils.addExtension(pmml, "lambda", Double.toString(lambda));
    PMMLUtils.addExtension(pmml, "implicit", Boolean.toString(implicit));
    if (implicit) {
      PMMLUtils.addExtension(pmml, "alpha", Double.toString(alpha));
    }
    addIDsExtension(pmml, "XIDs", model.userFeatures());
    addIDsExtension(pmml, "YIDs", model.productFeatures());
    return pmml;
  }
  public static void main(String[] args) {
    SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkRecommendationEngine");
    JavaSparkContext sc = new JavaSparkContext(conf);

    // Load and parse the data
    String path = "data/test.csv";
    JavaRDD<String> data = sc.textFile(path);
    JavaRDD<Rating> ratings =
        data.map(
            new Function<String, Rating>() {
              public Rating call(String s) {
                String[] sarray = s.split(",");
                return new Rating(
                    Integer.parseInt(sarray[0]),
                    Integer.parseInt(sarray[1]),
                    Double.parseDouble(sarray[2]));
              }
            });

    // Build the recommendation model using ALS
    int rank = 10;
    int numIterations = 10;
    MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);

    // Evaluate the model on rating data
    JavaRDD<Tuple2<Object, Object>> userProducts =
        ratings.map(
            new Function<Rating, Tuple2<Object, Object>>() {
              public Tuple2<Object, Object> call(Rating r) {
                return new Tuple2<Object, Object>(r.user(), r.product());
              }
            });
    JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions =
        JavaPairRDD.fromJavaRDD(
            model
                .predict(JavaRDD.toRDD(userProducts))
                .toJavaRDD()
                .map(
                    new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
                      public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) {
                        return new Tuple2<Tuple2<Integer, Integer>, Double>(
                            new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
                      }
                    }));
    JavaRDD<Tuple2<Double, Double>> ratesAndPreds =
        JavaPairRDD.fromJavaRDD(
                ratings.map(
                    new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
                      public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) {
                        return new Tuple2<Tuple2<Integer, Integer>, Double>(
                            new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
                      }
                    }))
            .join(predictions)
            .values();
    double MSE =
        JavaDoubleRDD.fromRDD(
                ratesAndPreds
                    .map(
                        new Function<Tuple2<Double, Double>, Object>() {
                          public Object call(Tuple2<Double, Double> pair) {
                            Double err = pair._1() - pair._2();
                            return err * err;
                          }
                        })
                    .rdd())
            .mean();
    System.out.println("Mean Squared Error = " + MSE);

    // Save and load model
    model.save(sc.sc(), "myModelPath");
    MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath");
  }