Exemplo n.º 1
0
 @Override
 protected List<TTDataSet> perform() throws TaskExecutionException, InterruptedException {
   Preconditions.checkNotNull(sources);
   List<TTDataSet> datasets = new ArrayList<TTDataSet>(sources.size());
   for (TTDataSet dataset : sources) {
     try {
       GenericTTDataBuilder builder = new GenericTTDataBuilder();
       if (getName() == null) {
         builder.setName(dataset.getName());
       } else {
         builder.setName(getName());
       }
       for (Map.Entry<String, Object> entry : dataset.getAttributes().entrySet()) {
         builder.setAttribute(entry.getKey(), entry.getValue());
       }
       builder.setAttribute("Retain", retain);
       builder.setQuery(dataset.getQueryData());
       builder.setTest(dataset.getTestData());
       builder.setTrain(
           downsample(dataset.getTrainingData(), dataset.getTestData().getUserDAO().getUserIds()));
       datasets.add(builder.build());
     } catch (IOException e) {
       throw new TaskExecutionException(e);
     }
   }
   return datasets;
 }
Exemplo n.º 2
0
  private DataSource downsample(DataSource data, LongSet testUsers) throws IOException {
    String fileName = getFileName(data);
    File output = new File(fileName);
    UpToDateChecker checker = new UpToDateChecker();

    checker.addInput(data.lastModified());
    checker.addOutput(output);
    if (!checker.isUpToDate()) {
      RandomOrder<Rating> order = new RandomOrder<Rating>();
      Random rng = new Random();
      // write datasource
      CSVWriter csv = null;
      try {

        csv = CSVWriter.open(output, null);
        Cursor<UserHistory<Rating>> histories =
            data.getUserEventDAO().streamEventsByUser(Rating.class);
        for (UserHistory<Rating> ratings : histories) {
          List<Rating> rats = new ArrayList<Rating>(ratings);
          order.apply(rats, rng);
          for (int i = 0; i < rats.size(); i++) {
            if (!testUsers.contains(ratings.getUserId()) || i < retain) {
              Rating rating = rats.get(i);
              Preference pref = rating.getPreference();
              csv.writeRow(
                  Lists.newArrayList(
                      rating.getUserId(),
                      rating.getItemId(),
                      rating.getValue(),
                      rating.getTimestamp()));
            }
          }
        }
      } finally {
        if (csv != null) {
          csv.close();
        }
      }
    }

    CSVDataSourceBuilder builder = new CSVDataSourceBuilder(data.getName());
    builder.setDomain(data.getPreferenceDomain());
    builder.setFile(output);
    return builder.build();
  }