/** * Build a rating matrix from the rating data. Each user's ratings are first normalized by * subtracting a baseline score (usually a mean). * * @param userMapping The index mapping of user IDs to column numbers. * @param itemMapping The index mapping of item IDs to row numbers. * @return A matrix storing the <i>normalized</i> user ratings. */ private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) { final int nusers = userMapping.size(); final int nitems = itemMapping.size(); // Create a matrix with users on rows and items on columns logger.info("creating {} by {} rating matrix", nusers, nitems); RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems); // populate it with data Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser(); try { for (UserHistory<Event> user : users) { // Get the row number for this user int u = userMapping.getIndex(user.getUserId()); MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class)); MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet()); baselineScorer.score(user.getUserId(), baselines); // TODO Populate this user's row with their ratings, minus the baseline scores for (VectorEntry entry : ratings.fast(State.SET)) { long itemid = entry.getKey(); int i = itemMapping.getIndex(itemid); double rating = entry.getValue(); double baseline = baselines.get(itemid); matrix.setEntry(u, i, rating - baseline); } } } finally { users.close(); } return matrix; }
/** * Build the SVD model. * * @return A singular value decomposition recommender model. */ @Override public SVDModel get() { // Create index mappings of user and item IDs. // You can use these to find row and columns in the matrix based on user/item IDs. IdIndexMapping userMapping = IdIndexMapping.create(userDAO.getUserIds()); logger.debug("indexed {} users", userMapping.size()); IdIndexMapping itemMapping = IdIndexMapping.create(itemDAO.getItemIds()); logger.debug("indexed {} items", itemMapping.size()); // We have to do 2 things: // First, prepare a matrix containing the rating data. RealMatrix matrix = createRatingMatrix(userMapping, itemMapping); // Second, compute its factorization // All the work is done in the constructor SingularValueDecomposition svd = new SingularValueDecomposition(matrix); // Third, truncate the decomposed matrix // TODO Truncate the matrices and construct the SVD model RealMatrix userMatrix = svd.getU(); RealMatrix weights = svd.getS(); RealMatrix itemMatrix = svd.getV(); userMatrix = userMatrix.getSubMatrix(0, userMatrix.getRowDimension() - 1, 0, featureCount - 1); weights = weights.getSubMatrix(0, featureCount - 1, 0, featureCount - 1); itemMatrix = itemMatrix.getSubMatrix(0, itemMatrix.getRowDimension() - 1, 0, featureCount - 1); return new SVDModel(userMapping, itemMapping, userMatrix, itemMatrix, weights); }