/**
   * Build a rating matrix from the rating data. Each user's ratings are first normalized by
   * subtracting a baseline score (usually a mean).
   *
   * @param userMapping The index mapping of user IDs to column numbers.
   * @param itemMapping The index mapping of item IDs to row numbers.
   * @return A matrix storing the <i>normalized</i> user ratings.
   */
  private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) {
    final int nusers = userMapping.size();
    final int nitems = itemMapping.size();

    // Create a matrix with users on rows and items on columns
    logger.info("creating {} by {} rating matrix", nusers, nitems);
    RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems);

    // populate it with data
    Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser();
    try {
      for (UserHistory<Event> user : users) {
        // Get the row number for this user
        int u = userMapping.getIndex(user.getUserId());
        MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class));
        MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet());
        baselineScorer.score(user.getUserId(), baselines);
        // TODO Populate this user's row with their ratings, minus the baseline scores
        for (VectorEntry entry : ratings.fast(State.SET)) {
          long itemid = entry.getKey();
          int i = itemMapping.getIndex(itemid);
          double rating = entry.getValue();
          double baseline = baselines.get(itemid);
          matrix.setEntry(u, i, rating - baseline);
        }
      }
    } finally {
      users.close();
    }

    return matrix;
  }
Example #2
0
  private DataSource downsample(DataSource data, LongSet testUsers) throws IOException {
    String fileName = getFileName(data);
    File output = new File(fileName);
    UpToDateChecker checker = new UpToDateChecker();

    checker.addInput(data.lastModified());
    checker.addOutput(output);
    if (!checker.isUpToDate()) {
      RandomOrder<Rating> order = new RandomOrder<Rating>();
      Random rng = new Random();
      // write datasource
      CSVWriter csv = null;
      try {

        csv = CSVWriter.open(output, null);
        Cursor<UserHistory<Rating>> histories =
            data.getUserEventDAO().streamEventsByUser(Rating.class);
        for (UserHistory<Rating> ratings : histories) {
          List<Rating> rats = new ArrayList<Rating>(ratings);
          order.apply(rats, rng);
          for (int i = 0; i < rats.size(); i++) {
            if (!testUsers.contains(ratings.getUserId()) || i < retain) {
              Rating rating = rats.get(i);
              Preference pref = rating.getPreference();
              csv.writeRow(
                  Lists.newArrayList(
                      rating.getUserId(),
                      rating.getItemId(),
                      rating.getValue(),
                      rating.getTimestamp()));
            }
          }
        }
      } finally {
        if (csv != null) {
          csv.close();
        }
      }
    }

    CSVDataSourceBuilder builder = new CSVDataSourceBuilder(data.getName());
    builder.setDomain(data.getPreferenceDomain());
    builder.setFile(output);
    return builder.build();
  }