/** * Creates a matrix to process genre data and generate the first factor of the proximity matrix * needed for a {@code HIRItemScorer}. * * @param dao The DataAccessObject interfacing with the item data for the model * @param gDao The genreDataAccessObject interfacing with the genre data for the model */ public RowStochasticFactorOfProximity(ItemDAO dao, ItemGenreDAO gDao) { LongSet items = dao.getItemIds(); int genreSize = gDao.getGenreSize(); itemSize = items.size(); double[][] data = new double[itemSize][genreSize]; rowStochastic = MatrixUtils.createRealMatrix(data); int i = 0; LongIterator iter = items.iterator(); while (iter.hasNext()) { long item = iter.nextLong(); rowStochastic.setRowVector(i, gDao.getItemGenre(item)); i++; } }
/** Constructs and returns a {@link HIRModel}. */ @Override public HIRModel get() { LongSet items = buildContext.getItems(); LongIterator outer = items.iterator(); while (outer.hasNext()) { final long item1 = outer.nextLong(); final SparseVector vec1 = buildContext.itemVector(item1); LongIterator inner = items.iterator(); while (inner.hasNext()) { final long item2 = inner.nextLong(); SparseVector vec2 = buildContext.itemVector(item2); DAMatrix.putItemPair(item1, vec1, item2, vec2); } } return new HIRModel( DAMatrix.buildMatrix(), RSMatrix.RowStochastic(), TFMatrix.ColumnStochastic()); }
private DataSource downsample(DataSource data, LongSet testUsers) throws IOException { String fileName = getFileName(data); File output = new File(fileName); UpToDateChecker checker = new UpToDateChecker(); checker.addInput(data.lastModified()); checker.addOutput(output); if (!checker.isUpToDate()) { RandomOrder<Rating> order = new RandomOrder<Rating>(); Random rng = new Random(); // write datasource CSVWriter csv = null; try { csv = CSVWriter.open(output, null); Cursor<UserHistory<Rating>> histories = data.getUserEventDAO().streamEventsByUser(Rating.class); for (UserHistory<Rating> ratings : histories) { List<Rating> rats = new ArrayList<Rating>(ratings); order.apply(rats, rng); for (int i = 0; i < rats.size(); i++) { if (!testUsers.contains(ratings.getUserId()) || i < retain) { Rating rating = rats.get(i); Preference pref = rating.getPreference(); csv.writeRow( Lists.newArrayList( rating.getUserId(), rating.getItemId(), rating.getValue(), rating.getTimestamp())); } } } } finally { if (csv != null) { csv.close(); } } } CSVDataSourceBuilder builder = new CSVDataSourceBuilder(data.getName()); builder.setDomain(data.getPreferenceDomain()); builder.setFile(output); return builder.build(); }
public void close() { if (readed != null) readed.close(); }
public boolean addReadedId(long id) { return readed.add(id); }