/** Test falling back to an empty user. */ @Test public void testUserMeanBaselineFallback() { ItemScorer mean = makeGlobalMean(); ItemScorer pred = new UserMeanItemScorer( new PrefetchingUserEventDAO(dao), mean, new RatingVectorUserHistorySummarizer(), 0); // unseen user - should be global mean assertThat(pred.score(10, 10), closeTo(RATINGS_DAT_MEAN, 0.001)); }
@Test public void testItemMeanBaseline() { ItemScorer pred = new ItemMeanRatingItemScorer.Builder(dao, 0.0).get(); long[] items = {5, 7, 10}; double[] values = {3, 6, 4}; SparseVector map = MutableSparseVector.wrap(items, values).freeze(); // unseen item, should be global mean assertThat(pred.score(10, 2), closeTo(RATINGS_DAT_MEAN, 0.001)); // seen item - should be item average assertThat(pred.score(10, 5), closeTo(3.0, 0.001)); }
@Test public void testUserItemMeanBaseline() { ItemScorer base = new ItemMeanRatingItemScorer.Builder(dao, 0.0).get(); ItemScorer pred = new UserMeanItemScorer( new PrefetchingUserEventDAO(dao), base, new RatingVectorUserHistorySummarizer(), 0); // we use user 8 - their average offset is 0.5 // unseen item, should be global mean + user offset assertThat(pred.score(8, 10), closeTo(RATINGS_DAT_MEAN + 0.5, 0.001)); // seen item - should be item average + user offset assertThat(pred.score(8, 5), closeTo(3.5, 0.001)); }
/** * Build a rating matrix from the rating data. Each user's ratings are first normalized by * subtracting a baseline score (usually a mean). * * @param userMapping The index mapping of user IDs to column numbers. * @param itemMapping The index mapping of item IDs to row numbers. * @return A matrix storing the <i>normalized</i> user ratings. */ private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) { final int nusers = userMapping.size(); final int nitems = itemMapping.size(); // Create a matrix with users on rows and items on columns logger.info("creating {} by {} rating matrix", nusers, nitems); RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems); // populate it with data Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser(); try { for (UserHistory<Event> user : users) { // Get the row number for this user int u = userMapping.getIndex(user.getUserId()); MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class)); MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet()); baselineScorer.score(user.getUserId(), baselines); // TODO Populate this user's row with their ratings, minus the baseline scores for (VectorEntry entry : ratings.fast(State.SET)) { long itemid = entry.getKey(); int i = itemMapping.getIndex(itemid); double rating = entry.getValue(); double baseline = baselines.get(itemid); matrix.setEntry(u, i, rating - baseline); } } } finally { users.close(); } return matrix; }
@Override public void predict(long uid, @Nonnull MutableSparseVector predictions) { logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid); OrdRecModel params = new OrdRecModel(quantizer); SparseVector ratings = makeUserVector(uid, userEventDao); LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain()); MutableSparseVector scores = MutableSparseVector.create(keySet); itemScorer.score(uid, scores); params.train(ratings, scores); logger.debug("trained parameters for {}: {}", uid, params); Vector probabilities = Vector.createLength(params.getLevelCount()); Long2ObjectMap<IVector> distChannel = null; if (reportDistribution) { distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL); } for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) { long iid = e.getKey(); double score = scores.get(iid); params.getProbDistribution(score, probabilities); int mlIdx = probabilities.maxElementIndex(); predictions.set(e, quantizer.getIndexValue(mlIdx)); if (distChannel != null) { distChannel.put(e.getKey(), probabilities.immutable()); } } }
private double prediction(long user, long item) { double baseline = baselineScorer.score(user, item); try { RealMatrix userFeature = model.getUserVector(user); RealMatrix featureWeights = model.getFeatureWeights(); RealMatrix itemFeature = model.getItemVector(item); double product = userFeature.multiply(featureWeights).multiply(itemFeature.transpose()).getEntry(0, 0); return baseline + product; } catch (NullPointerException npe) { return baseline; } }
@Test public void testMeanBaseline() { ItemScorer pred = makeGlobalMean(); SparseVector pv = pred.score(10L, itemSet(2l)); assertEquals(RATINGS_DAT_MEAN, pv.get(2l), 0.00001); }