Ejemplo n.º 1
0
  private DataSource downsample(DataSource data, LongSet testUsers) throws IOException {
    String fileName = getFileName(data);
    File output = new File(fileName);
    UpToDateChecker checker = new UpToDateChecker();

    checker.addInput(data.lastModified());
    checker.addOutput(output);
    if (!checker.isUpToDate()) {
      RandomOrder<Rating> order = new RandomOrder<Rating>();
      Random rng = new Random();
      // write datasource
      CSVWriter csv = null;
      try {

        csv = CSVWriter.open(output, null);
        Cursor<UserHistory<Rating>> histories =
            data.getUserEventDAO().streamEventsByUser(Rating.class);
        for (UserHistory<Rating> ratings : histories) {
          List<Rating> rats = new ArrayList<Rating>(ratings);
          order.apply(rats, rng);
          for (int i = 0; i < rats.size(); i++) {
            if (!testUsers.contains(ratings.getUserId()) || i < retain) {
              Rating rating = rats.get(i);
              Preference pref = rating.getPreference();
              csv.writeRow(
                  Lists.newArrayList(
                      rating.getUserId(),
                      rating.getItemId(),
                      rating.getValue(),
                      rating.getTimestamp()));
            }
          }
        }
      } finally {
        if (csv != null) {
          csv.close();
        }
      }
    }

    CSVDataSourceBuilder builder = new CSVDataSourceBuilder(data.getName());
    builder.setDomain(data.getPreferenceDomain());
    builder.setFile(output);
    return builder.build();
  }