private static double[] doTestRandomVecs(LongObjectMap<float[]> Y, float[] userVec) { LocationSensitiveHash lsh = new LocationSensitiveHash(Y, 0.1, 20); LongSet candidates = new LongSet(); float[][] userVecs = {userVec}; for (Iterator<LongObjectMap.MapEntry<float[]>> candidatesIterator : lsh.getCandidateIterator(userVecs)) { while (candidatesIterator.hasNext()) { candidates.add(candidatesIterator.next().getKey()); } } List<Long> topIDs = findTopRecommendations(Y, userVec); double score = 0.0; double maxScore = 0.0; int intersectionSize = 0; for (int i = 0; i < topIDs.size(); i++) { double value = LN2 / Math.log(2.0 + i); long id = topIDs.get(i); if (candidates.contains(id)) { intersectionSize++; score += value; } maxScore += value; } double percentTopRecsConsidered = (double) intersectionSize / topIDs.size(); double ndcg = maxScore == 0.0 ? 0.0 : score / maxScore; double percentAllItemsConsidered = (double) candidates.size() / Y.size(); return new double[] {percentTopRecsConsidered, ndcg, percentAllItemsConsidered}; }
/** * @param id ID to hash * @param numericID explicit, supplied hash of {@code id} argument, given to be stored in the * mapping */ public void addMapping(String id, long numericID) { Lock writeLock = lock.writeLock(); writeLock.lock(); try { reverseMapping.put(numericID, id); } finally { writeLock.unlock(); } }
/** * @param numericID hash value to map back to a {@link String} * @return the {@link String} ID that hashed to the value, or if none exists, then simply the * argument as a {@link String} */ public String toString(long numericID) { String id = null; Lock readLock = lock.readLock(); readLock.lock(); try { id = reverseMapping.get(numericID); } finally { readLock.unlock(); } return id == null ? Long.toString(numericID) : id; }
@Test public void testLSH() { RandomGenerator random = RandomManager.getRandom(); Mean avgPercentTopRecsConsidered = new Mean(); Mean avgNDCG = new Mean(); Mean avgPercentAllItemsConsidered = new Mean(); for (int iteration = 0; iteration < ITERATIONS; iteration++) { LongObjectMap<float[]> Y = new LongObjectMap<float[]>(); for (int i = 0; i < NUM_ITEMS; i++) { Y.put(i, RandomUtils.randomUnitVector(NUM_FEATURES, random)); } float[] userVec = RandomUtils.randomUnitVector(NUM_FEATURES, random); double[] results = doTestRandomVecs(Y, userVec); double percentTopRecsConsidered = results[0]; double ndcg = results[1]; double percentAllItemsConsidered = results[2]; log.info( "Considered {}% of all candidates, {} nDCG, got {}% recommendations correct", 100 * percentAllItemsConsidered, ndcg, 100 * percentTopRecsConsidered); avgPercentTopRecsConsidered.increment(percentTopRecsConsidered); avgNDCG.increment(ndcg); avgPercentAllItemsConsidered.increment(percentAllItemsConsidered); } log.info("{}", avgPercentTopRecsConsidered.getResult()); log.info("{}", avgNDCG.getResult()); log.info("{}", avgPercentAllItemsConsidered.getResult()); assertTrue(avgPercentTopRecsConsidered.getResult() > 0.8); assertTrue(avgNDCG.getResult() > 0.8); assertTrue(avgPercentAllItemsConsidered.getResult() < 0.09); }
private static List<Long> findTopRecommendations(LongObjectMap<float[]> Y, float[] userVec) { SortedMap<Double, Long> allScores = Maps.newTreeMap(Collections.reverseOrder()); for (LongObjectMap.MapEntry<float[]> entry : Y.entrySet()) { double dot = SimpleVectorMath.dot(entry.getValue(), userVec); allScores.put(dot, entry.getKey()); } List<Long> topRecommendations = Lists.newArrayList(); for (Map.Entry<Double, Long> entry : allScores.entrySet()) { topRecommendations.add(entry.getValue()); if (topRecommendations.size() == NUM_RECS) { return topRecommendations; } } return topRecommendations; }
/** * @return an implementation of {@link CandidateFilter} chosen per above. It will be non-null. * @param Y item-feature matrix * @param yReadLock read lock that should be acquired to access {@code Y} */ public CandidateFilter buildCandidateFilter(LongObjectMap<float[]> Y, Lock yReadLock) { Preconditions.checkNotNull(Y); if (!Y.isEmpty()) { yReadLock.lock(); try { if (candidateFilterClassName != null) { return ClassUtils.loadInstanceOf( candidateFilterClassName, CandidateFilter.class, new Class<?>[] {LongObjectMap.class}, new Object[] {Y}); } // LSH is a bit of a special case, handled here if (lshSampleRatio < 1.0) { return new LocationSensitiveHashFilter(Y, lshSampleRatio, numHashes); } } finally { yReadLock.unlock(); } } return new IdentityCandidateFilter(Y); }
public int size() { return reverseMapping.size(); }