private double doItemSimilarity(long itemID1, long itemID2, FastIDSet preferring1) throws TasteException { double intersection = 0.0; double union = 0.0; for (Preference pref : getDataModel().getPreferencesForItem(itemID2)) { long userID = pref.getUserID(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); if (preferring1.contains(userID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_user = preferring1.iterator(); it_user.hasNext(); ) { long userID = (long) it_user.nextLong(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); union += weight; } if (intersection == 0) { return Double.NaN; } return Math.log(intersection) / Math.log(union); }
@Override public double userSimilarity(long userID1, long userID2) throws TasteException { DataModel dataModel = getDataModel(); FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1); FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2); int xPrefsSize = xPrefs.size(); int yPrefsSize = yPrefs.size(); if (xPrefsSize == 0 && yPrefsSize == 0) { return Double.NaN; } if (xPrefsSize == 0 || yPrefsSize == 0) { return 0.0; } double intersection = 0.0; double union = 0.0; for (LongPrimitiveIterator it_item = xPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); if (yPrefs.contains(itemID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_item = yPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); union += weight; } return Math.log(intersection) / Math.log(union); }
@Override public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException { FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { throw new NoSuchItemException(itemID); } PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size()); int i = 0; LongPrimitiveIterator it = userIDs.iterator(); while (it.hasNext()) { prefArray.setUserID(i, it.nextLong()); prefArray.setItemID(i, itemID); i++; } return prefArray; }
@Override public List<RecommendedItem> recommend( long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) throws TasteException { Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); log.debug("Recommending items for user ID '{}'", userID); PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems); TopItems.Estimator<Long> estimator = new Estimator(userID); List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, estimator); log.debug("Recommendations are: {}", topItems); return topItems; }
@Override protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx) throws IOException, InterruptedException { int howmany = 0; FastIDSet bufferedVertexIDs = new FastIDSet(); for (Vertex firstVertexOfMissingEdge : vertices) { LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator(); while (bufferedVertexIdsIterator.hasNext()) { Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong()); UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge); JoinableUndirectedEdge key = new JoinableUndirectedEdge(missingEdge, false); VertexOrMarker value = new VertexOrMarker(vertex); log.trace("{} -> {}", key, value); howmany++; ctx.write(key, value); } bufferedVertexIDs.add(firstVertexOfMissingEdge.getId()); } log.debug("{} triads on vertex {}", howmany, vertex); }
/** * Creates a new {@link GenericDataModel} from the given users (and their preferences). This * {@link DataModel} retains all this information in memory and is effectively immutable. * * @param userData users to include * @param timestamps optionally, provided timestamps of preferences as milliseconds since the * epoch. User IDs are mapped to maps of item IDs to Long timestamps. */ public GenericBooleanPrefDataModel( FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) { Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<>(); FastIDSet itemIDSet = new FastIDSet(); for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) { long userID = entry.getKey(); FastIDSet itemIDs = entry.getValue(); itemIDSet.addAll(itemIDs); LongPrimitiveIterator it = itemIDs.iterator(); while (it.hasNext()) { long itemID = it.nextLong(); FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { userIDs = new FastIDSet(2); preferenceForItems.put(itemID, userIDs); } userIDs.add(userID); } } this.itemIDs = itemIDSet.toArray(); itemIDSet = null; // Might help GC -- this is big Arrays.sort(itemIDs); this.userIDs = new long[userData.size()]; int i = 0; LongPrimitiveIterator it = userData.keySetIterator(); while (it.hasNext()) { userIDs[i++] = it.next(); } Arrays.sort(userIDs); this.timestamps = timestamps; }