Beispiel #1
0
  /** check whether the explicit user/item filter works */
  @Test
  public void testCompleteJobWithFiltering() throws Exception {

    File inputFile = getTestTempFile("prefs.txt");
    File userFile = getTestTempFile("users.txt");
    File filterFile = getTestTempFile("filter.txt");
    File outputDir = getTestTempDir("output");
    outputDir.delete();
    File tmpDir = getTestTempDir("tmp");

    writeLines(
        inputFile, "1,1,5", "1,2,5", "1,3,2", "2,1,2", "2,3,3", "2,4,5", "3,2,5", "3,4,3", "4,1,3",
        "4,4,5");

    /* only compute recommendations for the donkey */
    writeLines(userFile, "4");
    /* do not recommend the hotdog for the donkey */
    writeLines(filterFile, "4,2");

    RecommenderJob recommenderJob = new RecommenderJob();

    Configuration conf = new Configuration();
    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
    conf.setBoolean("mapred.output.compress", false);

    recommenderJob.setConf(conf);

    recommenderJob.run(
        new String[] {
          "--tempDir",
          tmpDir.getAbsolutePath(),
          "--similarityClassname",
          DistributedTanimotoCoefficientVectorSimilarity.class.getName(),
          "--numRecommendations",
          "1",
          "--usersFile",
          userFile.getAbsolutePath(),
          "--filterFile",
          filterFile.getAbsolutePath()
        });

    Map<Long, List<RecommendedItem>> recommendations =
        readRecommendations(new File(outputDir, "part-r-00000"));

    assertEquals(1, recommendations.size());
    assertTrue(recommendations.containsKey(4L));
    assertEquals(1, recommendations.get(4L).size());

    /* berries should have been recommended to the donkey */
    RecommendedItem recommendedItem = recommendations.get(4L).get(0);
    assertEquals(3L, recommendedItem.getItemID());
    assertEquals(3.5, recommendedItem.getValue(), 0.05);
  }
  public static void main(String[] args)
      throws TasteException, UnknownHostException, MongoException {
    model =
        new MongoDBDataModel(
            "127.0.0.1",
            27017,
            "right-channel",
            "ratings",
            false,
            false,
            null,
            "user_id",
            "movie_id",
            "rating",
            MongoDBDataModel.DEFAULT_MONGO_MAP_COLLECTION);

    ItemSimilarity similarity = new PearsonCorrelationSimilarity(model);
    Recommender recommender = new BiasedItemBasedRecommender(model, similarity);
    IDRescorer rescorer = new OverallRescorer();
    List<RecommendedItem> recommendations =
        recommender.recommend(
            Long.parseLong(
                model.fromIdToLong(
                    new ObjectId("518ce9df4597b5adead4b61d").toStringMongod(), false)),
            1000,
            rescorer);

    Mongo mongoDB = new Mongo("127.0.0.1", 27017);
    DB db = mongoDB.getDB("right-channel");
    DBCollection recommendationCollection = db.getCollection("recommendations");

    BasicDBObject document = new BasicDBObject();
    document.put("user_id", new ObjectId("518ce9df4597b5adead4b61d"));
    recommendationCollection.remove(document);

    for (RecommendedItem recommendation : recommendations) {
      long movieIdLong = recommendation.getItemID();
      String movieIdString = model.fromLongToId(movieIdLong);
      ObjectId movieIdObject = new ObjectId(movieIdString);
      double rating = recommendation.getValue();

      BasicDBObject prediction = new BasicDBObject();
      Object userIdObject = new ObjectId("518ce9df4597b5adead4b61d");
      prediction.put("user_id", userIdObject);
      prediction.put("movie_id", movieIdObject);
      prediction.put("rating", rating);
      recommendationCollection.insert(prediction);

      System.out.println(rating);
    }
  }
  public static void main(String[] args) throws IOException, TasteException {
    String file = "datafile/item.csv";
    DataModel model = new FileDataModel(new File(file));
    UserSimilarity user = new EuclideanDistanceSimilarity(model);
    NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model);
    Recommender r = new GenericUserBasedRecommender(model, neighbor, user);
    LongPrimitiveIterator iter = model.getUserIDs();

    while (iter.hasNext()) {
      long uid = iter.nextLong();
      List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM);
      System.out.printf("uid:%s", uid);
      for (RecommendedItem ritem : list) {
        System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue());
      }
      System.out.println();
    }
  }
Beispiel #4
0
 @Test
 public void testTopItems() throws Exception {
   long[] ids = new long[100];
   for (int i = 0; i < 100; i++) {
     ids[i] = i;
   }
   LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids);
   TopItems.Estimator<Long> estimator =
       new TopItems.Estimator<Long>() {
         @Override
         public double estimate(Long thing) {
           return thing;
         }
       };
   List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator);
   int gold = 99;
   for (RecommendedItem topItem : topItems) {
     assertEquals(gold, topItem.getItemID());
     assertEquals(gold--, topItem.getValue(), 0.01);
   }
 }
 @Override
 protected void map(IntWritable key, VectorWritable value, Context context)
     throws IOException, InterruptedException {
   Vector v = value.get();
   Iterator<Vector.Element> iter = v.iterateNonZero();
   TopK<RecommendedItem> topKItems =
       new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE);
   while (iter.hasNext()) {
     Vector.Element e = iter.next();
     topKItems.offer(new GenericRecommendedItem(e.index(), (float) e.get()));
   }
   List<RecommendedItem> recommendedItems =
       Lists.newArrayListWithExpectedSize(recommendationsPerUser);
   for (RecommendedItem topItem : topKItems.retrieve()) {
     recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), topItem.getValue()));
   }
   if (recommendedItems.size() > 0) {
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < recommendedItems.size(); i++) {
       RecommendedItem item = recommendedItems.get(i);
       if (i != 0) {
         sb.append(DELIMETER);
       }
       sb.append(item.getItemID()).append(DELIMETER).append(item.getValue());
     }
     outValue.set(sb.toString());
     context.write(key, outValue);
   }
 }
  public static void main(String[] args) throws TasteException, MongoException, IOException {
    String ml10mFolder = "/home/yapianyu/Desktop/movielens/ml-10M100K/refinement/";
    String ratingFilePath = ml10mFolder + "ratings.dat";
    String movieFilePath = ml10mFolder + "movies.dat";

    for (String line : new FileLineIterable(new File(movieFilePath), false)) {
      String[] fields = line.split("::");
      long id = Long.parseLong(fields[0]);
      long year =
          Long.parseLong(fields[1].substring(fields[1].length() - 5, fields[1].length() - 1));
      String title = fields[3];
      try {
        double rating = Double.parseDouble(fields[4]);
        mid2rating.put(id, rating);
      } catch (Exception e) {
        mid2rating.put(id, Double.NaN);
      }
      mid2year.put(id, year);
      mid2title.put(id, title);
    }

    DataModel model = new FileDataModel(new File(ratingFilePath));
    ItemSimilarity similarity = new PearsonCorrelationSimilarity(model);
    Recommender recommender = new BiasedItemBasedRecommender(model, similarity);
    IDRescorer rescorer = new OverallRescorer();
    List<RecommendedItem> recommendations = recommender.recommend(72000, 1000, rescorer);
    for (RecommendedItem recommendation : recommendations) {
      long mid = recommendation.getItemID();
      System.out.println(
          String.format(
              "%.3f\t%d\t%.1f\t%s(%d)",
              recommendation.getValue(),
              mid2year.get(mid),
              mid2rating.get(mid),
              mid2title.get(mid),
              mid));
    }
  }
Beispiel #7
0
 @Test
 public void testTopItemsRandom() throws Exception {
   long[] ids = new long[100];
   for (int i = 0; i < 100; i++) {
     ids[i] = i;
   }
   LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids);
   final Random random = RandomUtils.getRandom();
   TopItems.Estimator<Long> estimator =
       new TopItems.Estimator<Long>() {
         @Override
         public double estimate(Long thing) {
           return random.nextDouble();
         }
       };
   List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator);
   assertEquals(10, topItems.size());
   double last = 2.0;
   for (RecommendedItem topItem : topItems) {
     assertTrue(topItem.getValue() <= last);
     last = topItem.getItemID();
   }
 }
  public static void main(String args[]) {

    try {

      // Loading the DATA;

      DataModel dm =
          new FileDataModel(
              new File(
                  "C:\\Users\\bryce\\Course Work\\3. Full Summer\\Big Data\\Final Project\\Yelp\\FINAL CODE\\Mahout\\data\\busirec_new.csv"));

      // We use the below line to relate businesses.
      // ItemSimilarity sim = new LogLikelihoodSimilarity(dm);

      TanimotoCoefficientSimilarity sim = new TanimotoCoefficientSimilarity((dm));

      // Using the below line get recommendations
      GenericItemBasedRecommender recommender = new GenericItemBasedRecommender(dm, sim);

      // Looping through every business.
      for (LongPrimitiveIterator items = dm.getItemIDs(); items.hasNext(); ) {
        long itemId = items.nextLong();

        // For each business we recommend 3 businesses.

        List<RecommendedItem> recommendations = recommender.mostSimilarItems(itemId, 2);

        for (RecommendedItem recommendation : recommendations) {

          System.out.println(
              itemId + "," + recommendation.getItemID() + "," + recommendation.getValue());
        }
      }
    } catch (IOException | TasteException e) {
      System.out.println(e);
    }
  }
  public static void main(String[] args)
      throws FileNotFoundException, TasteException, IOException, OptionException {
    DataModel model;
    model = new FileDataModel(new File("datasets/ratingsForMahout.dat"));

    File movieMapFile = new File("datasets/moviesForMahout.dat");
    HashMap<Long, String> movieMap = new HashMap<Long, String>();
    Scanner scan = new Scanner(movieMapFile);
    while (scan.hasNextLine()) {
      String[] line = scan.nextLine().split("\\|");
      movieMap.put(Long.parseLong(line[0]), line[1]);
    }
    scan.close();

    UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model);
    UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, userSimilarity, model);
    Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, userSimilarity);
    Recommender cachingRecommender = new CachingRecommender(recommender);

    for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext(); ) {
      long userId = it.nextLong();
      List<RecommendedItem> recommendations = cachingRecommender.recommend(userId, 10);
      if (recommendations.size() == 0) {
        System.out.println("User " + userId + ": no recommendations");
      }
      for (RecommendedItem recommendedItem : recommendations) {
        System.out.println(
            "User "
                + userId
                + ": "
                + movieMap.get(recommendedItem.getItemID())
                + "; value="
                + recommendedItem.getValue());
      }
    }
  }
Beispiel #10
0
  /** small integration test for boolean data */
  @Test
  public void testCompleteJobBoolean() throws Exception {

    File inputFile = getTestTempFile("prefs.txt");
    File outputDir = getTestTempDir("output");
    outputDir.delete();
    File tmpDir = getTestTempDir("tmp");
    File usersFile = getTestTempFile("users.txt");
    writeLines(usersFile, "3");

    writeLines(inputFile, "1,1", "1,2", "1,3", "2,1", "2,3", "2,4", "3,2", "3,4", "4,1", "4,4");

    RecommenderJob recommenderJob = new RecommenderJob();

    Configuration conf = new Configuration();
    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
    conf.setBoolean("mapred.output.compress", false);

    recommenderJob.setConf(conf);

    recommenderJob.run(
        new String[] {
          "--tempDir",
          tmpDir.getAbsolutePath(),
          "--similarityClassname",
          DistributedCooccurrenceVectorSimilarity.class.getName(),
          "--booleanData",
          "true",
          "--usersFile",
          usersFile.getAbsolutePath()
        });

    Map<Long, List<RecommendedItem>> recommendations =
        readRecommendations(new File(outputDir, "part-r-00000"));

    List<RecommendedItem> recommendedToCow = recommendations.get(3L);
    assertEquals(2, recommendedToCow.size());

    RecommendedItem item1 = recommendedToCow.get(0);
    RecommendedItem item2 = recommendedToCow.get(1);

    assertEquals(1L, item1.getItemID());
    assertEquals(3L, item2.getItemID());

    /* predicted pref must be the sum of similarities:
     *    item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3
     *    item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */
    assertEquals(3, item1.getValue(), 0.05);
    assertEquals(2, item2.getValue(), 0.05);
  }
Beispiel #11
0
  /**
   * small integration test that runs the full job
   *
   * <p>As a tribute to http://www.slideshare.net/srowen/collaborative-filtering-at-scale, we
   * recommend people food to animals in this test :)
   *
   * <pre>
   *
   *  user-item-matrix
   *
   *          burger  hotdog  berries  icecream
   *  dog       5       5        2        -
   *  rabbit    2       -        3        5
   *  cow       -       5        -        3
   *  donkey    3       -        -        5
   *
   *
   *  item-item-similarity-matrix (tanimoto-coefficient of the item-vectors of the user-item-matrix)
   *
   *          burger  hotdog  berries icecream
   *  burger    -      0.25    0.66    0.5
   *  hotdog   0.25     -      0.33    0.25
   *  berries  0.66    0.33     -      0.25
   *  icecream 0.5     0.25    0.25     -
   *
   *
   *  Prediction(dog, icecream)   = (0.5 * 5 + 0.25 * 5 + 0.25 * 2 ) / (0.5 + 0.25 + 0.25)  ~ 4.3
   *  Prediction(rabbit, hotdog)  = (0.25 * 2 + 0.33 * 3 + 0.25 * 5) / (0.25 + 0.33 + 0.25) ~ 3,3
   *  Prediction(cow, burger)     = (0.25 * 5 + 0.5 * 3) / (0.25 + 0.5)                     ~ 3,7
   *  Prediction(cow, berries)    = (0.33 * 5 + 0.25 * 3) / (0.33 + 0.25)                   ~ 4,1
   *  Prediction(donkey, hotdog)  = (0.25 * 3 + 0.25 * 5) / (0.25 + 0.25)                   ~ 4
   *  Prediction(donkey, berries) = (0.66 * 3 + 0.25 * 5) / (0.66 + 0.25)                   ~ 3,5
   *
   * </pre>
   */
  @Test
  public void testCompleteJob() throws Exception {

    File inputFile = getTestTempFile("prefs.txt");
    File outputDir = getTestTempDir("output");
    outputDir.delete();
    File tmpDir = getTestTempDir("tmp");

    writeLines(
        inputFile, "1,1,5", "1,2,5", "1,3,2", "2,1,2", "2,3,3", "2,4,5", "3,2,5", "3,4,3", "4,1,3",
        "4,4,5");

    RecommenderJob recommenderJob = new RecommenderJob();

    Configuration conf = new Configuration();
    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
    conf.setBoolean("mapred.output.compress", false);

    recommenderJob.setConf(conf);

    recommenderJob.run(
        new String[] {
          "--tempDir",
          tmpDir.getAbsolutePath(),
          "--similarityClassname",
          DistributedTanimotoCoefficientVectorSimilarity.class.getName(),
          "--numRecommendations",
          "4"
        });

    Map<Long, List<RecommendedItem>> recommendations =
        readRecommendations(new File(outputDir, "part-r-00000"));

    assertEquals(4, recommendations.size());

    for (Entry<Long, List<RecommendedItem>> entry : recommendations.entrySet()) {
      long userID = entry.getKey();
      List<RecommendedItem> items = entry.getValue();
      assertNotNull(items);
      RecommendedItem item1 = items.get(0);

      if (userID == 1L) {
        assertEquals(1, items.size());
        assertEquals(4L, item1.getItemID());
        assertEquals(4.3, item1.getValue(), 0.05);
      }
      if (userID == 2L) {
        assertEquals(1, items.size());
        assertEquals(2L, item1.getItemID());
        assertEquals(3.3, item1.getValue(), 0.05);
      }
      if (userID == 3L) {
        assertEquals(2, items.size());
        assertEquals(3L, item1.getItemID());
        assertEquals(4.1, item1.getValue(), 0.05);
        RecommendedItem item2 = items.get(1);
        assertEquals(1L, item2.getItemID());
        assertEquals(3.7, item2.getValue(), 0.05);
      }
      if (userID == 4L) {
        assertEquals(2, items.size());
        assertEquals(2L, item1.getItemID());
        assertEquals(4.0, item1.getValue(), 0.05);
        RecommendedItem item2 = items.get(1);
        assertEquals(3L, item2.getItemID());
        assertEquals(3.5, item2.getValue(), 0.05);
      }
    }
  }
 @Override
 public int compare(RecommendedItem one, RecommendedItem two) {
   return Floats.compare(one.getValue(), two.getValue());
 }
  @Override
  public List<RecommendedItem> recommend(
      long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
      throws TasteException {

    Map<Long, Pair<Long, Float>> recommendedItemOccurrence = new HashMap<Long, Pair<Long, Float>>();
    if (criteriaList != null && recommenderMap != null) {
      for (RecommenderCriteriumEntity criterium : criteriaList) {
        Recommender recommender = recommenderMap.get(criterium);
        if (recommender != null) {
          List<RecommendedItem> recommendedItems =
              recommender.recommend(userID, howMany, rescorer, includeKnownItems);

          if (recommendedItems != null) {
            for (RecommendedItem recommendedItem : recommendedItems) {
              Pair<Long, Float> pair = recommendedItemOccurrence.get(recommendedItem.getItemID());
              if (pair == null) {
                pair = new ImmutablePair<Long, Float>(1L, recommendedItem.getValue());
              } else {
                pair =
                    new ImmutablePair<Long, Float>(
                        pair.getLeft() + 1, (pair.getRight() + recommendedItem.getValue()) / 2);
              }

              recommendedItemOccurrence.put(recommendedItem.getItemID(), pair);
            }
          }
        }
      }
    }

    PriorityQueue<ImmutablePair<RecommendedItem, Long>> queue =
        new PriorityQueue<ImmutablePair<RecommendedItem, Long>>(
            10,
            new Comparator<ImmutablePair<RecommendedItem, Long>>() {

              @Override
              public int compare(
                  ImmutablePair<RecommendedItem, Long> o1,
                  ImmutablePair<RecommendedItem, Long> o2) {
                int result = -o1.getRight().compareTo(o2.getRight());

                if (result == 0) {
                  Float v1 = o1.getLeft().getValue();
                  Float v2 = o2.getLeft().getValue();

                  result = v1.compareTo(v2);
                }

                return result;
              }
            });

    for (Map.Entry<Long, Pair<Long, Float>> entry : recommendedItemOccurrence.entrySet()) {
      RecommendedItem recommendedItem =
          new GenericRecommendedItem(entry.getKey(), entry.getValue().getRight());

      ImmutablePair<RecommendedItem, Long> pair =
          new ImmutablePair<RecommendedItem, Long>(recommendedItem, entry.getValue().getLeft());
      queue.add(pair);
    }

    List<RecommendedItem> recommendedItems = new ArrayList<RecommendedItem>();
    do {
      if (queue.peek() != null && queue.peek().getLeft() != null) {
        recommendedItems.add(queue.poll().getLeft());
      }
    } while (recommendedItems.size() < howMany && !queue.isEmpty());

    return recommendedItems;
  }