/** * Build a rating matrix from the rating data. Each user's ratings are first normalized by * subtracting a baseline score (usually a mean). * * @param userMapping The index mapping of user IDs to column numbers. * @param itemMapping The index mapping of item IDs to row numbers. * @return A matrix storing the <i>normalized</i> user ratings. */ private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) { final int nusers = userMapping.size(); final int nitems = itemMapping.size(); // Create a matrix with users on rows and items on columns logger.info("creating {} by {} rating matrix", nusers, nitems); RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems); // populate it with data Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser(); try { for (UserHistory<Event> user : users) { // Get the row number for this user int u = userMapping.getIndex(user.getUserId()); MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class)); MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet()); baselineScorer.score(user.getUserId(), baselines); // TODO Populate this user's row with their ratings, minus the baseline scores for (VectorEntry entry : ratings.fast(State.SET)) { long itemid = entry.getKey(); int i = itemMapping.getIndex(itemid); double rating = entry.getValue(); double baseline = baselines.get(itemid); matrix.setEntry(u, i, rating - baseline); } } } finally { users.close(); } return matrix; }
private DataSource downsample(DataSource data, LongSet testUsers) throws IOException { String fileName = getFileName(data); File output = new File(fileName); UpToDateChecker checker = new UpToDateChecker(); checker.addInput(data.lastModified()); checker.addOutput(output); if (!checker.isUpToDate()) { RandomOrder<Rating> order = new RandomOrder<Rating>(); Random rng = new Random(); // write datasource CSVWriter csv = null; try { csv = CSVWriter.open(output, null); Cursor<UserHistory<Rating>> histories = data.getUserEventDAO().streamEventsByUser(Rating.class); for (UserHistory<Rating> ratings : histories) { List<Rating> rats = new ArrayList<Rating>(ratings); order.apply(rats, rng); for (int i = 0; i < rats.size(); i++) { if (!testUsers.contains(ratings.getUserId()) || i < retain) { Rating rating = rats.get(i); Preference pref = rating.getPreference(); csv.writeRow( Lists.newArrayList( rating.getUserId(), rating.getItemId(), rating.getValue(), rating.getTimestamp())); } } } } finally { if (csv != null) { csv.close(); } } } CSVDataSourceBuilder builder = new CSVDataSourceBuilder(data.getName()); builder.setDomain(data.getPreferenceDomain()); builder.setFile(output); return builder.build(); }