/** * There is no actual serialization of a massive factored matrix model into PMML. Instead, we * create an ad-hoc serialization where the model just contains pointers to files that contain the * matrix data, as Extensions. */ private static PMML mfModelToPMML( MatrixFactorizationModel model, int features, double lambda, double alpha, boolean implicit, Path candidatePath) { saveFeaturesRDD(model.userFeatures(), new Path(candidatePath, "X")); saveFeaturesRDD(model.productFeatures(), new Path(candidatePath, "Y")); PMML pmml = PMMLUtils.buildSkeletonPMML(); PMMLUtils.addExtension(pmml, "X", "X/"); PMMLUtils.addExtension(pmml, "Y", "Y/"); PMMLUtils.addExtension(pmml, "features", Integer.toString(features)); PMMLUtils.addExtension(pmml, "lambda", Double.toString(lambda)); PMMLUtils.addExtension(pmml, "implicit", Boolean.toString(implicit)); if (implicit) { PMMLUtils.addExtension(pmml, "alpha", Double.toString(alpha)); } addIDsExtension(pmml, "XIDs", model.userFeatures()); addIDsExtension(pmml, "YIDs", model.productFeatures()); return pmml; }
public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkRecommendationEngine"); JavaSparkContext sc = new JavaSparkContext(conf); // Load and parse the data String path = "data/test.csv"; JavaRDD<String> data = sc.textFile(path); JavaRDD<Rating> ratings = data.map( new Function<String, Rating>() { public Rating call(String s) { String[] sarray = s.split(","); return new Rating( Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), Double.parseDouble(sarray[2])); } }); // Build the recommendation model using ALS int rank = 10; int numIterations = 10; MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01); // Evaluate the model on rating data JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map( new Function<Rating, Tuple2<Object, Object>>() { public Tuple2<Object, Object> call(Rating r) { return new Tuple2<Object, Object>(r.user(), r.product()); } }); JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD( model .predict(JavaRDD.toRDD(userProducts)) .toJavaRDD() .map( new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) { return new Tuple2<Tuple2<Integer, Integer>, Double>( new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); } })); JavaRDD<Tuple2<Double, Double>> ratesAndPreds = JavaPairRDD.fromJavaRDD( ratings.map( new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r) { return new Tuple2<Tuple2<Integer, Integer>, Double>( new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); } })) .join(predictions) .values(); double MSE = JavaDoubleRDD.fromRDD( ratesAndPreds .map( new Function<Tuple2<Double, Double>, Object>() { public Object call(Tuple2<Double, Double> pair) { Double err = pair._1() - pair._2(); return err * err; } }) .rdd()) .mean(); System.out.println("Mean Squared Error = " + MSE); // Save and load model model.save(sc.sc(), "myModelPath"); MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath"); }