private double doItemSimilarity(long itemID1, long itemID2, FastIDSet preferring1) throws TasteException { double intersection = 0.0; double union = 0.0; for (Preference pref : getDataModel().getPreferencesForItem(itemID2)) { long userID = pref.getUserID(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); if (preferring1.contains(userID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_user = preferring1.iterator(); it_user.hasNext(); ) { long userID = (long) it_user.nextLong(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); union += weight; } if (intersection == 0) { return Double.NaN; } return Math.log(intersection) / Math.log(union); }
/** * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded */ @Test public void testUserVectorSplitterMapperUserExclusion() throws Exception { Mapper<VarLongWritable, VectorWritable, VarIntWritable, VectorOrPrefWritable>.Context context = EasyMock.createMock(Mapper.Context.class); context.write( EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f)); context.write( EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f)); EasyMock.replay(context); FastIDSet usersToRecommendFor = new FastIDSet(); usersToRecommendFor.add(123L); UserVectorSplitterMapper mapper = new UserVectorSplitterMapper(); setField(mapper, "maxPrefsPerUserConsidered", 10); setField(mapper, "usersToRecommendFor", usersToRecommendFor); RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); vector.set(34, 0.5); vector.set(56, 0.7); mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context); mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context); EasyMock.verify(context); }
private FastIDSet toUserFastIDSet(PreferenceArray array) { FastIDSet fastIDSet = new FastIDSet(); for (Preference preference : array) { fastIDSet.add(preference.getUserID()); } return fastIDSet; }
@Override public Float getPreferenceValue(long userID, long itemID) throws NoSuchUserException { FastIDSet itemIDs = preferenceFromUsers.get(userID); if (itemIDs == null) { throw new NoSuchUserException(userID); } if (itemIDs.contains(itemID)) { return 1.0f; } return null; }
public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<PreferenceArray> data) { for (Map.Entry<Long, Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) { PreferenceArray prefArray = (PreferenceArray) entry.getValue(); int size = prefArray.length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.add(prefArray.getItemID(i)); } entry.setValue(itemIDs); } return (FastByIDMap<FastIDSet>) (FastByIDMap<?>) data; }
@Override public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException { FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { throw new NoSuchItemException(itemID); } PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size()); int i = 0; LongPrimitiveIterator it = userIDs.iterator(); while (it.hasNext()) { prefArray.setUserID(i, it.nextLong()); prefArray.setItemID(i, itemID); i++; } return prefArray; }
@Test public void testStrategy() throws TasteException { FastIDSet itemIDsFromUser123 = new FastIDSet(); itemIDsFromUser123.add(1L); FastIDSet itemIDsFromUser456 = new FastIDSet(); itemIDsFromUser456.add(1L); itemIDsFromUser456.add(2L); List<Preference> prefs = new ArrayList<Preference>(); prefs.add(new GenericPreference(123L, 1L, 1.0f)); prefs.add(new GenericPreference(456L, 1L, 1.0f)); PreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs); DataModel dataModel = EasyMock.createMock(DataModel.class); EasyMock.expect(dataModel.getPreferencesForItem(1L)).andReturn(preferencesForItem1); EasyMock.expect(dataModel.getItemIDsFromUser(123L)).andReturn(itemIDsFromUser123); EasyMock.expect(dataModel.getItemIDsFromUser(456L)).andReturn(itemIDsFromUser456); PreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(Arrays.asList(new GenericPreference(123L, 1L, 1.0f))); CandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy(); EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.getCandidateItems(123L, prefArrayOfUser123, dataModel); assertEquals(1, candidateItems.size()); assertTrue(candidateItems.contains(2L)); EasyMock.verify(dataModel); }
@Override public List<RecommendedItem> recommend( long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) throws TasteException { Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); log.debug("Recommending items for user ID '{}'", userID); PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems); TopItems.Estimator<Long> estimator = new Estimator(userID); List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, estimator); log.debug("Recommendations are: {}", topItems); return topItems; }
@Override protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx) throws IOException, InterruptedException { int howmany = 0; FastIDSet bufferedVertexIDs = new FastIDSet(); for (Vertex firstVertexOfMissingEdge : vertices) { LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator(); while (bufferedVertexIdsIterator.hasNext()) { Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong()); UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge); JoinableUndirectedEdge key = new JoinableUndirectedEdge(missingEdge, false); VertexOrMarker value = new VertexOrMarker(vertex); log.trace("{} -> {}", key, value); howmany++; ctx.write(key, value); } bufferedVertexIDs.add(firstVertexOfMissingEdge.getId()); } log.debug("{} triads on vertex {}", howmany, vertex); }
@Override public double userSimilarity(long userID1, long userID2) throws TasteException { FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1); FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2); int prefs1Size = prefs1.size(); int prefs2Size = prefs2.size(); int intersectionSize = prefs1Size < prefs2Size ? prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2); if (intersectionSize == 0) { return Double.NaN; } // int numItems = dataModel.getNumItems(); // int numItems = Math.max(prefs1Size, prefs2Size); // int numItems = prefs1Size; // double distance1 = (double) (prefs1Size - intersectionSize)/(double) 2; // double distance2 = (double) (prefs2Size - intersectionSize)/(double) 2; // double distance = (distance1 + distance2); double distance = (double) (prefs1Size - intersectionSize); double similarity = 1.0 / (1.0 + distance); ////// System.out.println( prefs1Size + ", " + prefs2Size + ", " + intersectionSize + ", " + // similarity); return similarity; }
@Override public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) { FastIDSet userIDs1 = preferenceForItems.get(itemID1); if (userIDs1 == null) { return 0; } FastIDSet userIDs2 = preferenceForItems.get(itemID2); if (userIDs2 == null) { return 0; } return userIDs1.size() < userIDs2.size() ? userIDs2.intersectionSize(userIDs1) : userIDs1.intersectionSize(userIDs2); }
@Override public double userSimilarity(long userID1, long userID2) throws TasteException { FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1); FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2); int prefs1Size = prefs1.size(); int prefs2Size = prefs2.size(); int intersectionSize = prefs1Size < prefs2Size ? prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2); if (intersectionSize == 0) { return Double.NaN; } int numItems = dataModel.getNumItems(); double logLikelihood = twoLogLambda( intersectionSize, prefs1Size - intersectionSize, prefs2Size, numItems - prefs2Size); return 1.0 - 1.0 / (1.0 + logLikelihood); }
/** * Creates a new {@link GenericDataModel} from the given users (and their preferences). This * {@link DataModel} retains all this information in memory and is effectively immutable. * * @param userData users to include * @param timestamps optionally, provided timestamps of preferences as milliseconds since the * epoch. User IDs are mapped to maps of item IDs to Long timestamps. */ public GenericBooleanPrefDataModel( FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) { Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<>(); FastIDSet itemIDSet = new FastIDSet(); for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) { long userID = entry.getKey(); FastIDSet itemIDs = entry.getValue(); itemIDSet.addAll(itemIDs); LongPrimitiveIterator it = itemIDs.iterator(); while (it.hasNext()) { long itemID = it.nextLong(); FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { userIDs = new FastIDSet(2); preferenceForItems.put(itemID, userIDs); } userIDs.add(userID); } } this.itemIDs = itemIDSet.toArray(); itemIDSet = null; // Might help GC -- this is big Arrays.sort(itemIDs); this.userIDs = new long[userData.size()]; int i = 0; LongPrimitiveIterator it = userData.keySetIterator(); while (it.hasNext()) { userIDs[i++] = it.next(); } Arrays.sort(userIDs); this.timestamps = timestamps; }
@Override public double userSimilarity(long userID1, long userID2) throws TasteException { DataModel dataModel = getDataModel(); FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1); FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2); int xPrefsSize = xPrefs.size(); int yPrefsSize = yPrefs.size(); if (xPrefsSize == 0 && yPrefsSize == 0) { return Double.NaN; } if (xPrefsSize == 0 || yPrefsSize == 0) { return 0.0; } double intersection = 0.0; double union = 0.0; for (LongPrimitiveIterator it_item = xPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); if (yPrefs.contains(itemID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_item = yPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); union += weight; } return Math.log(intersection) / Math.log(union); }
@Override public int getNumUsersWithPreferenceFor(long itemID) { FastIDSet userIDs1 = preferenceForItems.get(itemID); return userIDs1 == null ? 0 : userIDs1.size(); }