@Test public void testTwoLikes() { SparseVector vec = sum.summarize(History.forUser(42, Like.create(42, 39), Like.create(42, 67))); assertThat(vec.size(), equalTo(2)); assertThat(vec.get(39), equalTo(1.0)); assertThat(vec.get(67), equalTo(1.0)); }
@Override public void predict(long uid, @Nonnull MutableSparseVector predictions) { logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid); OrdRecModel params = new OrdRecModel(quantizer); SparseVector ratings = makeUserVector(uid, userEventDao); LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain()); MutableSparseVector scores = MutableSparseVector.create(keySet); itemScorer.score(uid, scores); params.train(ratings, scores); logger.debug("trained parameters for {}: {}", uid, params); Vector probabilities = Vector.createLength(params.getLevelCount()); Long2ObjectMap<IVector> distChannel = null; if (reportDistribution) { distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL); } for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) { long iid = e.getKey(); double score = scores.get(iid); params.getProbDistribution(score, probabilities); int mlIdx = probabilities.maxElementIndex(); predictions.set(e, quantizer.getIndexValue(mlIdx)); if (distChannel != null) { distChannel.put(e.getKey(), probabilities.immutable()); } } }
@Override public Double get(long key) { if (vector.containsKey(key)) { return vector.get(key); } else { return defaultReturnValue(); } }
@Test public void testLikeBatch() { SparseVector vec = sum.summarize( History.forUser( 42, Like.create(42, 39), LikeBatch.create(42, 67, 402), Like.create(42, 39))); assertThat(vec.size(), equalTo(2)); assertThat(vec.get(39), equalTo(2.0)); assertThat(vec.get(67), equalTo(402.0)); }
/** * Check that we score items but do not provide scores for items the user has previously rated. * User 5 has rated only item 8 previously. */ @Test public void testItemScorerNoRating() { long[] items = {7, 8}; ItemItemScorer scorer = session.get(ItemItemScorer.class); assertThat(scorer, notNullValue()); SparseVector scores = scorer.score(5, LongArrayList.wrap(items)); assertThat(scores, notNullValue()); assertThat(scores.size(), equalTo(1)); assertThat(scores.get(7), not(notANumber())); assertThat(scores.containsKey(8), equalTo(false)); }
@Override public VectorTransformation makeTransformation(SparseVector reference) { double s = reference.norm(); if (Math.abs(s) < tolerance) { return new IdentityVectorNormalizer().makeTransformation(reference); } else { return new ScalingTransform(s); } }
/** * Get the tag vector for a particular item. * * @param item The item. * @return The item's tag vector. If the item is not known to the model, then this vector is * empty. */ public SparseVector getItemVector(long item) { // Look up the item SparseVector vec = itemVectors.get(item); if (vec == null) { // We don't know the item! Return an empty vector return SparseVector.empty(); } else { return vec; } }
@Override public double similarity(SparseVector vec1, SparseVector vec2) { final double distance; // One of the vector is empty if (Scalars.isZero(vec1.norm()) || Scalars.isZero(vec2.norm())) { return Double.NaN; } LongSet ts = LongUtils.setUnion(vec1.keySet(), vec2.keySet()); MutableSparseVector v1 = MutableSparseVector.create(ts); v1.fill(0); v1.set(vec1); v1.multiply(1.0 / v1.norm()); v1.addScaled(vec2, -1.0 / vec2.norm()); distance = v1.norm(); return 1 - distance; }
@Override public boolean equals(Object o) { if (this == o) { return true; } else if (o instanceof SparseVector) { SparseVector vo = (SparseVector) o; int sz = size(); int osz = vo.size(); if (sz != osz) { return false; } else { if (!this.keySet().equals(vo.keySet())) { return false; // same keys } // we know that sparse vector values are always in key order. so just compare them. return this.values().equals(vo.values()); } } else { return false; } }
public double measureUser( TestUser user, MeanAccumulator context, List<ScoredId> recommendations, int listSize) { if (recommendations.size() > listSize) { recommendations = new ArrayList<ScoredId>(recommendations.subList(0, listSize)); } SparseVector ratings = user.getTestRatings(); LongList ideal = ratings.keysByValue(true); if (ideal.size() > listSize) { ideal = ideal.subList(0, listSize); } double idealGain = computeDCG(ideal, ratings); LongList actual = new LongArrayList(recommendations.size()); for (ScoredId id : recommendations) { actual.add(id.getId()); } double gain = computeDCG(actual, ratings); double score = gain / idealGain; context.add(score); return score; }
@Override @SuppressWarnings({"rawtypes", "unchecked"}) public void execute() throws IOException, RecommenderBuildException { LenskitRecommenderEngine engine = loadEngine(); long user = options.getLong("user"); List<Long> items = options.get("items"); LenskitRecommender rec = engine.createRecommender(); RatingPredictor pred = rec.getRatingPredictor(); if (pred == null) { logger.error("recommender has no rating predictor"); throw new UnsupportedOperationException("no rating predictor"); } logger.info("predicting {} items", items.size()); Symbol pchan = getPrintChannel(); Stopwatch timer = Stopwatch.createStarted(); SparseVector preds = pred.predict(user, items); Long2ObjectMap channel = null; if (pchan != null) { for (TypedSymbol sym : preds.getChannelSymbols()) { if (sym.getRawSymbol().equals(pchan)) { channel = preds.getChannel(sym); } } } for (VectorEntry e : preds) { System.out.format(" %d: %.3f", e.getKey(), e.getValue()); if (channel != null) { System.out.format(" (%s)", channel.get(e.getKey())); } System.out.println(); } timer.stop(); logger.info("predicted for {} items in {}", items.size(), timer); }
/** Compute the DCG of a list of items with respect to a value vector. */ static double computeDCG(LongList items, SparseVector values) { final double lg2 = log(2); double gain = 0; int rank = 0; LongIterator iit = items.iterator(); while (iit.hasNext()) { final long item = iit.nextLong(); final double v = values.get(item, 0); rank++; if (rank < 2) { gain += v; } else { gain += v * lg2 / log(rank); } } return gain; }
/** The train function of OrdRec. Get all parameters after learning process. */ @SuppressWarnings("ConstantConditions") private void train(SparseVector ratings, MutableSparseVector scores) { Vector dbeta = Vector.createLength(beta.length()); double dt1; // n is the number of iteration; for (int j = 0; j < iterationCount; j++) { for (VectorEntry rating : ratings.fast()) { long iid = rating.getKey(); double score = scores.get(iid); int r = quantizer.index(rating.getValue()); double probEqualR = getProbEQ(score, r); double probLessR = getProbLE(score, r); double probLessR_1 = getProbLE(score, r - 1); dt1 = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, 0, t1) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, 0, t1) - regTerm * t1); double dbetaK; for (int k = 0; k < beta.length(); k++) { dbetaK = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, k + 1, beta.get(k)) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, k + 1, beta.get(k)) - regTerm * beta.get(k)); dbeta.set(k, dbetaK); } t1 = t1 + dt1; beta.add(dbeta); } } }
/** * Count the common keys between two vectors. * * @param o The other vector. * @return The number of keys appearing in both this and the other vector. */ public int countCommonKeys(SparseVector o) { int count = 0; Iterator<VectorEntry> i1 = fastIterator(); Iterator<VectorEntry> i2 = o.fastIterator(); VectorEntry e1 = i1.hasNext() ? i1.next() : null; VectorEntry e2 = i2.hasNext() ? i2.next() : null; while (e1 != null && e2 != null) { final long k1 = e1.getKey(); final long k2 = e2.getKey(); if (k1 < k2) { e1 = i1.hasNext() ? i1.next() : null; } else if (k2 < k1) { e2 = i2.hasNext() ? i2.next() : null; } else { count += 1; e1 = i1.hasNext() ? i1.next() : null; e2 = i2.hasNext() ? i2.next() : null; } } return count; }
/** * Compute the dot product between two vectors. * * @param o The other vector. * @return The dot (inner) product between this vector and {@var o}. */ public double dot(SparseVector o) { double dot = 0; Iterator<VectorEntry> i1 = fastIterator(); Iterator<VectorEntry> i2 = o.fastIterator(); VectorEntry e1 = i1.hasNext() ? i1.next() : null; VectorEntry e2 = i2.hasNext() ? i2.next() : null; while (e1 != null && e2 != null) { final long k1 = e1.getKey(); final long k2 = e2.getKey(); if (k1 < k2) { e1 = i1.hasNext() ? i1.next() : null; } else if (k2 < k1) { e2 = i2.hasNext() ? i2.next() : null; } else { dot += e1.getValue() * e2.getValue(); e1 = i1.hasNext() ? i1.next() : null; e2 = i2.hasNext() ? i2.next() : null; } } return dot; }
/** * Check that we score items but do not provide scores for items the user has previously rated. * User 5 has rated only item 8 previously. */ @Test public void testItemScorerChannels() { long[] items = {7, 8}; ItemItemScorer scorer = session.get(ItemItemScorer.class); assertThat(scorer, notNullValue()); SparseVector scores = scorer.score(5, LongArrayList.wrap(items)); assertThat(scores, notNullValue()); assertThat(scores.size(), equalTo(1)); assertThat(scores.get(7), not(notANumber())); assertThat( scores.getChannelVector(ItemItemScorer.NEIGHBORHOOD_SIZE_SYMBOL).get(7), closeTo(1.0, 1.0e-5)); assertThat(scores.containsKey(8), equalTo(false)); long[] items2 = {7, 8, 9}; scorer = session.get(ItemItemScorer.class); assertThat(scorer, notNullValue()); scores = scorer.score(2, LongArrayList.wrap(items2)); assertThat( scores.getChannelVector(ItemItemScorer.NEIGHBORHOOD_SIZE_SYMBOL).get(9), closeTo(3.0, 1.0e-5)); // 1, 7, 8 }
@Override public int size() { return vector.size(); }
@Test public void testEmptyConstructor() { SparseVector v = new ImmutableSparseVector(); assertThat(v.isEmpty(), equalTo(true)); assertThat(v.get(15, Double.NaN), notANumber()); }
@Override public boolean containsKey(long key) { return vector.containsKey(key); }
@Override public ObjectCollection<Double> values() { return CollectionUtils.objectCollection(vector.values()); }
@Override public LongSet keySet() { return vector.keySet(); }
@Test public void testMeanBaseline() { ItemScorer pred = makeGlobalMean(); SparseVector pv = pred.score(10L, itemSet(2l)); assertEquals(RATINGS_DAT_MEAN, pv.get(2l), 0.00001); }
@Test public void testSummarizeEmpty() { SparseVector vec = sum.summarize(History.forUser(42)); assertThat(vec.size(), equalTo(0)); }
@Test public void testSummarizeNoLike() { SparseVector vec = sum.summarize(History.forUser(42, Rating.create(42, 39, 2.5))); assertThat(vec.size(), equalTo(0)); }