private double doItemSimilarity(long itemID1, long itemID2, FastIDSet preferring1) throws TasteException { double intersection = 0.0; double union = 0.0; for (Preference pref : getDataModel().getPreferencesForItem(itemID2)) { long userID = pref.getUserID(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); if (preferring1.contains(userID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_user = preferring1.iterator(); it_user.hasNext(); ) { long userID = (long) it_user.nextLong(); double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID); union += weight; } if (intersection == 0) { return Double.NaN; } return Math.log(intersection) / Math.log(union); }
/** * Exports the simple user IDs and associated item IDs in the data model. * * @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing that user's * associated items */ public static FastByIDMap<FastIDSet> toDataMap(DataModel dataModel) throws TasteException { FastByIDMap<FastIDSet> data = new FastByIDMap<>(dataModel.getNumUsers()); LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { long userID = it.nextLong(); data.put(userID, dataModel.getItemIDsFromUser(userID)); } return data; }
private static long[] idsInOrder(FastByIDMap<?> input) { int n = input.size(); long[] idsInOrder = new long[n]; int count = 0; LongPrimitiveIterator it = input.keySetIterator(); while (it.hasNext()) { idsInOrder[count] = it.nextLong(); count++; } Preconditions.checkState(n == count); return idsInOrder; }
@Override public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException { FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { throw new NoSuchItemException(itemID); } PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size()); int i = 0; LongPrimitiveIterator it = userIDs.iterator(); while (it.hasNext()) { prefArray.setUserID(i, it.nextLong()); prefArray.setItemID(i, itemID); i++; } return prefArray; }
public static void main(String[] args) throws IOException, TasteException { String file = "datafile/item.csv"; DataModel model = new FileDataModel(new File(file)); UserSimilarity user = new EuclideanDistanceSimilarity(model); NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model); Recommender r = new GenericUserBasedRecommender(model, neighbor, user); LongPrimitiveIterator iter = model.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM); System.out.printf("uid:%s", uid); for (RecommendedItem ritem : list) { System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue()); } System.out.println(); } }
/** 对用户性别进行过滤 */ public static void filterGender( long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel, String gender) throws TasteException, IOException { Set<Long> userids = getByGender("datafile/book/user.csv", gender); // 计算指定性别用户打分过的图书 Set<Long> bookids = new HashSet<Long>(); for (long uids : userids) { LongPrimitiveIterator iter = dataModel.getItemIDsFromUser(uids).iterator(); while (iter.hasNext()) { long bookid = iter.next(); bookids.add(bookid); } } IDRescorer rescorer = new FilterRescorer(bookids); List<RecommendedItem> list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer); RecommendFactory.showItems(uid, list, false); }
@Override protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx) throws IOException, InterruptedException { int howmany = 0; FastIDSet bufferedVertexIDs = new FastIDSet(); for (Vertex firstVertexOfMissingEdge : vertices) { LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator(); while (bufferedVertexIdsIterator.hasNext()) { Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong()); UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge); JoinableUndirectedEdge key = new JoinableUndirectedEdge(missingEdge, false); VertexOrMarker value = new VertexOrMarker(vertex); log.trace("{} -> {}", key, value); howmany++; ctx.write(key, value); } bufferedVertexIDs.add(firstVertexOfMissingEdge.getId()); } log.debug("{} triads on vertex {}", howmany, vertex); }
private void buildAverageDiffs() throws TasteException { try { buildAveragesLock.writeLock().lock(); DataModel dataModel = getDataModel(); LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { long userID = it.nextLong(); PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); int size = prefs.length(); for (int i = 0; i < size; i++) { long itemID = prefs.getItemID(i); float value = prefs.getValue(i); addDatumAndCreateIfNeeded(itemID, value, itemAverages); addDatumAndCreateIfNeeded(userID, value, userAverages); overallAveragePrefValue.addDatum(value); } } } finally { buildAveragesLock.writeLock().unlock(); } }
public static void main(String args[]) { try { // Loading the DATA; DataModel dm = new FileDataModel( new File( "C:\\Users\\bryce\\Course Work\\3. Full Summer\\Big Data\\Final Project\\Yelp\\FINAL CODE\\Mahout\\data\\busirec_new.csv")); // We use the below line to relate businesses. // ItemSimilarity sim = new LogLikelihoodSimilarity(dm); TanimotoCoefficientSimilarity sim = new TanimotoCoefficientSimilarity((dm)); // Using the below line get recommendations GenericItemBasedRecommender recommender = new GenericItemBasedRecommender(dm, sim); // Looping through every business. for (LongPrimitiveIterator items = dm.getItemIDs(); items.hasNext(); ) { long itemId = items.nextLong(); // For each business we recommend 3 businesses. List<RecommendedItem> recommendations = recommender.mostSimilarItems(itemId, 2); for (RecommendedItem recommendation : recommendations) { System.out.println( itemId + "," + recommendation.getItemID() + "," + recommendation.getValue()); } } } catch (IOException | TasteException e) { System.out.println(e); } }
public static void main(String[] args) throws FileNotFoundException, TasteException, IOException, OptionException { DataModel model; model = new FileDataModel(new File("datasets/ratingsForMahout.dat")); File movieMapFile = new File("datasets/moviesForMahout.dat"); HashMap<Long, String> movieMap = new HashMap<Long, String>(); Scanner scan = new Scanner(movieMapFile); while (scan.hasNextLine()) { String[] line = scan.nextLine().split("\\|"); movieMap.put(Long.parseLong(line[0]), line[1]); } scan.close(); UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, userSimilarity, model); Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, userSimilarity); Recommender cachingRecommender = new CachingRecommender(recommender); for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext(); ) { long userId = it.nextLong(); List<RecommendedItem> recommendations = cachingRecommender.recommend(userId, 10); if (recommendations.size() == 0) { System.out.println("User " + userId + ": no recommendations"); } for (RecommendedItem recommendedItem : recommendations) { System.out.println( "User " + userId + ": " + movieMap.get(recommendedItem.getItemID()) + "; value=" + recommendedItem.getValue()); } } }
private void refreshUserAndItemPrefNum() throws TasteException { mUserPrefNum = new FastByIDMap<Integer>(); mItemPrefNum = new FastByIDMap<Integer>(); LongPrimitiveIterator it_user = getDataModel().getUserIDs(); while (it_user.hasNext()) { long userID = it_user.nextLong(); mUserPrefNum.put(userID, getDataModel().getPreferencesFromUser(userID).length()); } LongPrimitiveIterator it_item = getDataModel().getItemIDs(); while (it_item.hasNext()) { long itemID = it_item.nextLong(); mItemPrefNum.put(itemID, getDataModel().getNumUsersWithPreferenceFor(itemID)); } }
/** * Creates a new {@link GenericDataModel} from the given users (and their preferences). This * {@link DataModel} retains all this information in memory and is effectively immutable. * * @param userData users to include * @param timestamps optionally, provided timestamps of preferences as milliseconds since the * epoch. User IDs are mapped to maps of item IDs to Long timestamps. */ public GenericBooleanPrefDataModel( FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) { Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<>(); FastIDSet itemIDSet = new FastIDSet(); for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) { long userID = entry.getKey(); FastIDSet itemIDs = entry.getValue(); itemIDSet.addAll(itemIDs); LongPrimitiveIterator it = itemIDs.iterator(); while (it.hasNext()) { long itemID = it.nextLong(); FastIDSet userIDs = preferenceForItems.get(itemID); if (userIDs == null) { userIDs = new FastIDSet(2); preferenceForItems.put(itemID, userIDs); } userIDs.add(userID); } } this.itemIDs = itemIDSet.toArray(); itemIDSet = null; // Might help GC -- this is big Arrays.sort(itemIDs); this.userIDs = new long[userData.size()]; int i = 0; LongPrimitiveIterator it = userData.keySetIterator(); while (it.hasNext()) { userIDs[i++] = it.next(); } Arrays.sort(userIDs); this.timestamps = timestamps; }
@Override public double userSimilarity(long userID1, long userID2) throws TasteException { DataModel dataModel = getDataModel(); FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1); FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2); int xPrefsSize = xPrefs.size(); int yPrefsSize = yPrefs.size(); if (xPrefsSize == 0 && yPrefsSize == 0) { return Double.NaN; } if (xPrefsSize == 0 || yPrefsSize == 0) { return 0.0; } double intersection = 0.0; double union = 0.0; for (LongPrimitiveIterator it_item = xPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); if (yPrefs.contains(itemID)) { intersection += weight; union -= weight; } union += weight; } for (LongPrimitiveIterator it_item = yPrefs.iterator(); it_item.hasNext(); ) { long itemID = (long) it_item.nextLong(); double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID); union += weight; } return Math.log(intersection) / Math.log(union); }
@Override public double evaluate( RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder, DataModel dataModel, double trainingPercentage, double evaluationPercentage) throws TasteException { Preconditions.checkNotNull(recommenderBuilder); Preconditions.checkNotNull(dataModel); Preconditions.checkArgument( trainingPercentage >= 0.0 && trainingPercentage <= 1.0, "Invalid trainingPercentage: " + trainingPercentage + ". Must be: 0.0 <= trainingPercentage <= 1.0"); Preconditions.checkArgument( evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0, "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 <= evaluationPercentage <= 1.0"); log.info("Beginning evaluation using {} of {}", trainingPercentage, dataModel); int numUsers = dataModel.getNumUsers(); FastByIDMap<PreferenceArray> trainingPrefs = new FastByIDMap<PreferenceArray>(1 + (int) (evaluationPercentage * numUsers)); FastByIDMap<PreferenceArray> testPrefs = new FastByIDMap<PreferenceArray>(1 + (int) (evaluationPercentage * numUsers)); totalOfTrainingRatingsFromSource = 0; totalOfTrainingRatingsFromTargetWithContext = 0; totalOfTrainingRatingsFromTargetWithoutContext = 0; totalOfTestRatings = 0; LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { long userID = it.nextLong(); if (random.nextDouble() < evaluationPercentage) { splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel); } } // System.out.println("Training (Source, TargetWithoutContext, TargetWithContext): // "+totalOfTrainingRatingsFromSource+"/"+totalOfTrainingRatingsFromTargetWithoutContext+"/"+totalOfTrainingRatingsFromTargetWithContext); // int totalTraining = // (totalOfTrainingRatingsFromSource+totalOfTrainingRatingsFromTargetWithContext+totalOfTrainingRatingsFromTargetWithoutContext); // System.out.println("Training/Test: "+totalTraining+"/"+totalOfTestRatings); DataModel newDataModel = dataModel instanceof ContextualDataModel ? new ContextualDataModel(trainingPrefs) : new GenericDataModel(trainingPrefs); DataModel trainingModel = dataModelBuilder == null ? newDataModel : dataModelBuilder.buildDataModel(trainingPrefs); Recommender recommender = recommenderBuilder.buildRecommender(trainingModel); double result = getEvaluation(testPrefs, recommender); log.info("Evaluation result: {}", result); return result; }