Example #1
0
 @Test
 public void testItemMeanBaseline() {
   ItemScorer pred = new ItemMeanRatingItemScorer.Builder(dao, 0.0).get();
   long[] items = {5, 7, 10};
   double[] values = {3, 6, 4};
   SparseVector map = MutableSparseVector.wrap(items, values).freeze();
   // unseen item, should be global mean
   assertThat(pred.score(10, 2), closeTo(RATINGS_DAT_MEAN, 0.001));
   // seen item - should be item average
   assertThat(pred.score(10, 5), closeTo(3.0, 0.001));
 }
Example #2
0
 @Test
 public void testUserMeanBaseline() {
   ItemScorer mean = makeGlobalMean();
   ItemScorer pred =
       new UserMeanItemScorer(
           new PrefetchingUserEventDAO(dao), mean, new RatingVectorUserHistorySummarizer(), 0);
   // unseen item
   assertThat(pred.score(8, 4), closeTo(4.5, 0.001));
   // seen item - should be same avg
   assertThat(pred.score(8, 10), closeTo(4.5, 0.001));
   // unseen user - should be global mean
   assertThat(pred.score(10, 10), closeTo(RATINGS_DAT_MEAN, 0.001));
 }
Example #3
0
  @Test
  public void testUserItemMeanBaseline() {
    ItemScorer base = new ItemMeanRatingItemScorer.Builder(dao, 0.0).get();
    ItemScorer pred =
        new UserMeanItemScorer(
            new PrefetchingUserEventDAO(dao), base, new RatingVectorUserHistorySummarizer(), 0);
    // we use user 8 - their average offset is 0.5
    // unseen item, should be global mean + user offset
    assertThat(pred.score(8, 10), closeTo(RATINGS_DAT_MEAN + 0.5, 0.001));

    // seen item - should be item average + user offset
    assertThat(pred.score(8, 5), closeTo(3.5, 0.001));
  }
  /**
   * Build a rating matrix from the rating data. Each user's ratings are first normalized by
   * subtracting a baseline score (usually a mean).
   *
   * @param userMapping The index mapping of user IDs to column numbers.
   * @param itemMapping The index mapping of item IDs to row numbers.
   * @return A matrix storing the <i>normalized</i> user ratings.
   */
  private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) {
    final int nusers = userMapping.size();
    final int nitems = itemMapping.size();

    // Create a matrix with users on rows and items on columns
    logger.info("creating {} by {} rating matrix", nusers, nitems);
    RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems);

    // populate it with data
    Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser();
    try {
      for (UserHistory<Event> user : users) {
        // Get the row number for this user
        int u = userMapping.getIndex(user.getUserId());
        MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class));
        MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet());
        baselineScorer.score(user.getUserId(), baselines);
        // TODO Populate this user's row with their ratings, minus the baseline scores
        for (VectorEntry entry : ratings.fast(State.SET)) {
          long itemid = entry.getKey();
          int i = itemMapping.getIndex(itemid);
          double rating = entry.getValue();
          double baseline = baselines.get(itemid);
          matrix.setEntry(u, i, rating - baseline);
        }
      }
    } finally {
      users.close();
    }

    return matrix;
  }
  @Override
  public void predict(long uid, @Nonnull MutableSparseVector predictions) {
    logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid);
    OrdRecModel params = new OrdRecModel(quantizer);
    SparseVector ratings = makeUserVector(uid, userEventDao);
    LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain());
    MutableSparseVector scores = MutableSparseVector.create(keySet);
    itemScorer.score(uid, scores);
    params.train(ratings, scores);
    logger.debug("trained parameters for {}: {}", uid, params);

    Vector probabilities = Vector.createLength(params.getLevelCount());
    Long2ObjectMap<IVector> distChannel = null;
    if (reportDistribution) {
      distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL);
    }

    for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) {
      long iid = e.getKey();
      double score = scores.get(iid);
      params.getProbDistribution(score, probabilities);

      int mlIdx = probabilities.maxElementIndex();

      predictions.set(e, quantizer.getIndexValue(mlIdx));
      if (distChannel != null) {
        distChannel.put(e.getKey(), probabilities.immutable());
      }
    }
  }
 private double prediction(long user, long item) {
   double baseline = baselineScorer.score(user, item);
   try {
     RealMatrix userFeature = model.getUserVector(user);
     RealMatrix featureWeights = model.getFeatureWeights();
     RealMatrix itemFeature = model.getItemVector(item);
     double product =
         userFeature.multiply(featureWeights).multiply(itemFeature.transpose()).getEntry(0, 0);
     return baseline + product;
   } catch (NullPointerException npe) {
     return baseline;
   }
 }
Example #7
0
 @Test
 public void testMeanBaseline() {
   ItemScorer pred = makeGlobalMean();
   SparseVector pv = pred.score(10L, itemSet(2l));
   assertEquals(RATINGS_DAT_MEAN, pv.get(2l), 0.00001);
 }