/** check whether the explicit user/item filter works */ @Test public void testCompleteJobWithFiltering() throws Exception { File inputFile = getTestTempFile("prefs.txt"); File userFile = getTestTempFile("users.txt"); File filterFile = getTestTempFile("filter.txt"); File outputDir = getTestTempDir("output"); outputDir.delete(); File tmpDir = getTestTempDir("tmp"); writeLines( inputFile, "1,1,5", "1,2,5", "1,3,2", "2,1,2", "2,3,3", "2,4,5", "3,2,5", "3,4,3", "4,1,3", "4,4,5"); /* only compute recommendations for the donkey */ writeLines(userFile, "4"); /* do not recommend the hotdog for the donkey */ writeLines(filterFile, "4,2"); RecommenderJob recommenderJob = new RecommenderJob(); Configuration conf = new Configuration(); conf.set("mapred.input.dir", inputFile.getAbsolutePath()); conf.set("mapred.output.dir", outputDir.getAbsolutePath()); conf.setBoolean("mapred.output.compress", false); recommenderJob.setConf(conf); recommenderJob.run( new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", DistributedTanimotoCoefficientVectorSimilarity.class.getName(), "--numRecommendations", "1", "--usersFile", userFile.getAbsolutePath(), "--filterFile", filterFile.getAbsolutePath() }); Map<Long, List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); assertEquals(1, recommendations.size()); assertTrue(recommendations.containsKey(4L)); assertEquals(1, recommendations.get(4L).size()); /* berries should have been recommended to the donkey */ RecommendedItem recommendedItem = recommendations.get(4L).get(0); assertEquals(3L, recommendedItem.getItemID()); assertEquals(3.5, recommendedItem.getValue(), 0.05); }
public static void main(String[] args) throws TasteException, UnknownHostException, MongoException { model = new MongoDBDataModel( "127.0.0.1", 27017, "right-channel", "ratings", false, false, null, "user_id", "movie_id", "rating", MongoDBDataModel.DEFAULT_MONGO_MAP_COLLECTION); ItemSimilarity similarity = new PearsonCorrelationSimilarity(model); Recommender recommender = new BiasedItemBasedRecommender(model, similarity); IDRescorer rescorer = new OverallRescorer(); List<RecommendedItem> recommendations = recommender.recommend( Long.parseLong( model.fromIdToLong( new ObjectId("518ce9df4597b5adead4b61d").toStringMongod(), false)), 1000, rescorer); Mongo mongoDB = new Mongo("127.0.0.1", 27017); DB db = mongoDB.getDB("right-channel"); DBCollection recommendationCollection = db.getCollection("recommendations"); BasicDBObject document = new BasicDBObject(); document.put("user_id", new ObjectId("518ce9df4597b5adead4b61d")); recommendationCollection.remove(document); for (RecommendedItem recommendation : recommendations) { long movieIdLong = recommendation.getItemID(); String movieIdString = model.fromLongToId(movieIdLong); ObjectId movieIdObject = new ObjectId(movieIdString); double rating = recommendation.getValue(); BasicDBObject prediction = new BasicDBObject(); Object userIdObject = new ObjectId("518ce9df4597b5adead4b61d"); prediction.put("user_id", userIdObject); prediction.put("movie_id", movieIdObject); prediction.put("rating", rating); recommendationCollection.insert(prediction); System.out.println(rating); } }
public static void main(String[] args) throws IOException, TasteException { String file = "datafile/item.csv"; DataModel model = new FileDataModel(new File(file)); UserSimilarity user = new EuclideanDistanceSimilarity(model); NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model); Recommender r = new GenericUserBasedRecommender(model, neighbor, user); LongPrimitiveIterator iter = model.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM); System.out.printf("uid:%s", uid); for (RecommendedItem ritem : list) { System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue()); } System.out.println(); } }
@Test public void testTopItems() throws Exception { long[] ids = new long[100]; for (int i = 0; i < 100; i++) { ids[i] = i; } LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids); TopItems.Estimator<Long> estimator = new TopItems.Estimator<Long>() { @Override public double estimate(Long thing) { return thing; } }; List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator); int gold = 99; for (RecommendedItem topItem : topItems) { assertEquals(gold, topItem.getItemID()); assertEquals(gold--, topItem.getValue(), 0.01); } }
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector v = value.get(); Iterator<Vector.Element> iter = v.iterateNonZero(); TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE); while (iter.hasNext()) { Vector.Element e = iter.next(); topKItems.offer(new GenericRecommendedItem(e.index(), (float) e.get())); } List<RecommendedItem> recommendedItems = Lists.newArrayListWithExpectedSize(recommendationsPerUser); for (RecommendedItem topItem : topKItems.retrieve()) { recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), topItem.getValue())); } if (recommendedItems.size() > 0) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < recommendedItems.size(); i++) { RecommendedItem item = recommendedItems.get(i); if (i != 0) { sb.append(DELIMETER); } sb.append(item.getItemID()).append(DELIMETER).append(item.getValue()); } outValue.set(sb.toString()); context.write(key, outValue); } }
public static void main(String[] args) throws TasteException, MongoException, IOException { String ml10mFolder = "/home/yapianyu/Desktop/movielens/ml-10M100K/refinement/"; String ratingFilePath = ml10mFolder + "ratings.dat"; String movieFilePath = ml10mFolder + "movies.dat"; for (String line : new FileLineIterable(new File(movieFilePath), false)) { String[] fields = line.split("::"); long id = Long.parseLong(fields[0]); long year = Long.parseLong(fields[1].substring(fields[1].length() - 5, fields[1].length() - 1)); String title = fields[3]; try { double rating = Double.parseDouble(fields[4]); mid2rating.put(id, rating); } catch (Exception e) { mid2rating.put(id, Double.NaN); } mid2year.put(id, year); mid2title.put(id, title); } DataModel model = new FileDataModel(new File(ratingFilePath)); ItemSimilarity similarity = new PearsonCorrelationSimilarity(model); Recommender recommender = new BiasedItemBasedRecommender(model, similarity); IDRescorer rescorer = new OverallRescorer(); List<RecommendedItem> recommendations = recommender.recommend(72000, 1000, rescorer); for (RecommendedItem recommendation : recommendations) { long mid = recommendation.getItemID(); System.out.println( String.format( "%.3f\t%d\t%.1f\t%s(%d)", recommendation.getValue(), mid2year.get(mid), mid2rating.get(mid), mid2title.get(mid), mid)); } }
@Test public void testTopItemsRandom() throws Exception { long[] ids = new long[100]; for (int i = 0; i < 100; i++) { ids[i] = i; } LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids); final Random random = RandomUtils.getRandom(); TopItems.Estimator<Long> estimator = new TopItems.Estimator<Long>() { @Override public double estimate(Long thing) { return random.nextDouble(); } }; List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator); assertEquals(10, topItems.size()); double last = 2.0; for (RecommendedItem topItem : topItems) { assertTrue(topItem.getValue() <= last); last = topItem.getItemID(); } }
public static void main(String args[]) { try { // Loading the DATA; DataModel dm = new FileDataModel( new File( "C:\\Users\\bryce\\Course Work\\3. Full Summer\\Big Data\\Final Project\\Yelp\\FINAL CODE\\Mahout\\data\\busirec_new.csv")); // We use the below line to relate businesses. // ItemSimilarity sim = new LogLikelihoodSimilarity(dm); TanimotoCoefficientSimilarity sim = new TanimotoCoefficientSimilarity((dm)); // Using the below line get recommendations GenericItemBasedRecommender recommender = new GenericItemBasedRecommender(dm, sim); // Looping through every business. for (LongPrimitiveIterator items = dm.getItemIDs(); items.hasNext(); ) { long itemId = items.nextLong(); // For each business we recommend 3 businesses. List<RecommendedItem> recommendations = recommender.mostSimilarItems(itemId, 2); for (RecommendedItem recommendation : recommendations) { System.out.println( itemId + "," + recommendation.getItemID() + "," + recommendation.getValue()); } } } catch (IOException | TasteException e) { System.out.println(e); } }
public static void main(String[] args) throws FileNotFoundException, TasteException, IOException, OptionException { DataModel model; model = new FileDataModel(new File("datasets/ratingsForMahout.dat")); File movieMapFile = new File("datasets/moviesForMahout.dat"); HashMap<Long, String> movieMap = new HashMap<Long, String>(); Scanner scan = new Scanner(movieMapFile); while (scan.hasNextLine()) { String[] line = scan.nextLine().split("\\|"); movieMap.put(Long.parseLong(line[0]), line[1]); } scan.close(); UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, userSimilarity, model); Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, userSimilarity); Recommender cachingRecommender = new CachingRecommender(recommender); for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext(); ) { long userId = it.nextLong(); List<RecommendedItem> recommendations = cachingRecommender.recommend(userId, 10); if (recommendations.size() == 0) { System.out.println("User " + userId + ": no recommendations"); } for (RecommendedItem recommendedItem : recommendations) { System.out.println( "User " + userId + ": " + movieMap.get(recommendedItem.getItemID()) + "; value=" + recommendedItem.getValue()); } } }
/** small integration test for boolean data */ @Test public void testCompleteJobBoolean() throws Exception { File inputFile = getTestTempFile("prefs.txt"); File outputDir = getTestTempDir("output"); outputDir.delete(); File tmpDir = getTestTempDir("tmp"); File usersFile = getTestTempFile("users.txt"); writeLines(usersFile, "3"); writeLines(inputFile, "1,1", "1,2", "1,3", "2,1", "2,3", "2,4", "3,2", "3,4", "4,1", "4,4"); RecommenderJob recommenderJob = new RecommenderJob(); Configuration conf = new Configuration(); conf.set("mapred.input.dir", inputFile.getAbsolutePath()); conf.set("mapred.output.dir", outputDir.getAbsolutePath()); conf.setBoolean("mapred.output.compress", false); recommenderJob.setConf(conf); recommenderJob.run( new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", DistributedCooccurrenceVectorSimilarity.class.getName(), "--booleanData", "true", "--usersFile", usersFile.getAbsolutePath() }); Map<Long, List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); List<RecommendedItem> recommendedToCow = recommendations.get(3L); assertEquals(2, recommendedToCow.size()); RecommendedItem item1 = recommendedToCow.get(0); RecommendedItem item2 = recommendedToCow.get(1); assertEquals(1L, item1.getItemID()); assertEquals(3L, item2.getItemID()); /* predicted pref must be the sum of similarities: * item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3 * item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */ assertEquals(3, item1.getValue(), 0.05); assertEquals(2, item2.getValue(), 0.05); }
/** * small integration test that runs the full job * * <p>As a tribute to http://www.slideshare.net/srowen/collaborative-filtering-at-scale, we * recommend people food to animals in this test :) * * <pre> * * user-item-matrix * * burger hotdog berries icecream * dog 5 5 2 - * rabbit 2 - 3 5 * cow - 5 - 3 * donkey 3 - - 5 * * * item-item-similarity-matrix (tanimoto-coefficient of the item-vectors of the user-item-matrix) * * burger hotdog berries icecream * burger - 0.25 0.66 0.5 * hotdog 0.25 - 0.33 0.25 * berries 0.66 0.33 - 0.25 * icecream 0.5 0.25 0.25 - * * * Prediction(dog, icecream) = (0.5 * 5 + 0.25 * 5 + 0.25 * 2 ) / (0.5 + 0.25 + 0.25) ~ 4.3 * Prediction(rabbit, hotdog) = (0.25 * 2 + 0.33 * 3 + 0.25 * 5) / (0.25 + 0.33 + 0.25) ~ 3,3 * Prediction(cow, burger) = (0.25 * 5 + 0.5 * 3) / (0.25 + 0.5) ~ 3,7 * Prediction(cow, berries) = (0.33 * 5 + 0.25 * 3) / (0.33 + 0.25) ~ 4,1 * Prediction(donkey, hotdog) = (0.25 * 3 + 0.25 * 5) / (0.25 + 0.25) ~ 4 * Prediction(donkey, berries) = (0.66 * 3 + 0.25 * 5) / (0.66 + 0.25) ~ 3,5 * * </pre> */ @Test public void testCompleteJob() throws Exception { File inputFile = getTestTempFile("prefs.txt"); File outputDir = getTestTempDir("output"); outputDir.delete(); File tmpDir = getTestTempDir("tmp"); writeLines( inputFile, "1,1,5", "1,2,5", "1,3,2", "2,1,2", "2,3,3", "2,4,5", "3,2,5", "3,4,3", "4,1,3", "4,4,5"); RecommenderJob recommenderJob = new RecommenderJob(); Configuration conf = new Configuration(); conf.set("mapred.input.dir", inputFile.getAbsolutePath()); conf.set("mapred.output.dir", outputDir.getAbsolutePath()); conf.setBoolean("mapred.output.compress", false); recommenderJob.setConf(conf); recommenderJob.run( new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", DistributedTanimotoCoefficientVectorSimilarity.class.getName(), "--numRecommendations", "4" }); Map<Long, List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); assertEquals(4, recommendations.size()); for (Entry<Long, List<RecommendedItem>> entry : recommendations.entrySet()) { long userID = entry.getKey(); List<RecommendedItem> items = entry.getValue(); assertNotNull(items); RecommendedItem item1 = items.get(0); if (userID == 1L) { assertEquals(1, items.size()); assertEquals(4L, item1.getItemID()); assertEquals(4.3, item1.getValue(), 0.05); } if (userID == 2L) { assertEquals(1, items.size()); assertEquals(2L, item1.getItemID()); assertEquals(3.3, item1.getValue(), 0.05); } if (userID == 3L) { assertEquals(2, items.size()); assertEquals(3L, item1.getItemID()); assertEquals(4.1, item1.getValue(), 0.05); RecommendedItem item2 = items.get(1); assertEquals(1L, item2.getItemID()); assertEquals(3.7, item2.getValue(), 0.05); } if (userID == 4L) { assertEquals(2, items.size()); assertEquals(2L, item1.getItemID()); assertEquals(4.0, item1.getValue(), 0.05); RecommendedItem item2 = items.get(1); assertEquals(3L, item2.getItemID()); assertEquals(3.5, item2.getValue(), 0.05); } } }
@Override public int compare(RecommendedItem one, RecommendedItem two) { return Floats.compare(one.getValue(), two.getValue()); }
@Override public List<RecommendedItem> recommend( long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) throws TasteException { Map<Long, Pair<Long, Float>> recommendedItemOccurrence = new HashMap<Long, Pair<Long, Float>>(); if (criteriaList != null && recommenderMap != null) { for (RecommenderCriteriumEntity criterium : criteriaList) { Recommender recommender = recommenderMap.get(criterium); if (recommender != null) { List<RecommendedItem> recommendedItems = recommender.recommend(userID, howMany, rescorer, includeKnownItems); if (recommendedItems != null) { for (RecommendedItem recommendedItem : recommendedItems) { Pair<Long, Float> pair = recommendedItemOccurrence.get(recommendedItem.getItemID()); if (pair == null) { pair = new ImmutablePair<Long, Float>(1L, recommendedItem.getValue()); } else { pair = new ImmutablePair<Long, Float>( pair.getLeft() + 1, (pair.getRight() + recommendedItem.getValue()) / 2); } recommendedItemOccurrence.put(recommendedItem.getItemID(), pair); } } } } } PriorityQueue<ImmutablePair<RecommendedItem, Long>> queue = new PriorityQueue<ImmutablePair<RecommendedItem, Long>>( 10, new Comparator<ImmutablePair<RecommendedItem, Long>>() { @Override public int compare( ImmutablePair<RecommendedItem, Long> o1, ImmutablePair<RecommendedItem, Long> o2) { int result = -o1.getRight().compareTo(o2.getRight()); if (result == 0) { Float v1 = o1.getLeft().getValue(); Float v2 = o2.getLeft().getValue(); result = v1.compareTo(v2); } return result; } }); for (Map.Entry<Long, Pair<Long, Float>> entry : recommendedItemOccurrence.entrySet()) { RecommendedItem recommendedItem = new GenericRecommendedItem(entry.getKey(), entry.getValue().getRight()); ImmutablePair<RecommendedItem, Long> pair = new ImmutablePair<RecommendedItem, Long>(recommendedItem, entry.getValue().getLeft()); queue.add(pair); } List<RecommendedItem> recommendedItems = new ArrayList<RecommendedItem>(); do { if (queue.peek() != null && queue.peek().getLeft() != null) { recommendedItems.add(queue.poll().getLeft()); } } while (recommendedItems.size() < howMany && !queue.isEmpty()); return recommendedItems; }