private double doItemSimilarity(long itemID1, long itemID2, FastIDSet preferring1)
      throws TasteException {
    double intersection = 0.0;
    double union = 0.0;

    for (Preference pref : getDataModel().getPreferencesForItem(itemID2)) {
      long userID = pref.getUserID();
      double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID);
      if (preferring1.contains(userID)) {
        intersection += weight;
        union -= weight;
      }
      union += weight;
    }
    for (LongPrimitiveIterator it_user = preferring1.iterator(); it_user.hasNext(); ) {
      long userID = (long) it_user.nextLong();
      double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID);
      union += weight;
    }

    if (intersection == 0) {
      return Double.NaN;
    }

    return Math.log(intersection) / Math.log(union);
  }
예제 #2
0
 /**
  * Exports the simple user IDs and associated item IDs in the data model.
  *
  * @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing that user's
  *     associated items
  */
 public static FastByIDMap<FastIDSet> toDataMap(DataModel dataModel) throws TasteException {
   FastByIDMap<FastIDSet> data = new FastByIDMap<>(dataModel.getNumUsers());
   LongPrimitiveIterator it = dataModel.getUserIDs();
   while (it.hasNext()) {
     long userID = it.nextLong();
     data.put(userID, dataModel.getItemIDsFromUser(userID));
   }
   return data;
 }
 private static long[] idsInOrder(FastByIDMap<?> input) {
   int n = input.size();
   long[] idsInOrder = new long[n];
   int count = 0;
   LongPrimitiveIterator it = input.keySetIterator();
   while (it.hasNext()) {
     idsInOrder[count] = it.nextLong();
     count++;
   }
   Preconditions.checkState(n == count);
   return idsInOrder;
 }
예제 #4
0
 @Override
 public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException {
   FastIDSet userIDs = preferenceForItems.get(itemID);
   if (userIDs == null) {
     throw new NoSuchItemException(itemID);
   }
   PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size());
   int i = 0;
   LongPrimitiveIterator it = userIDs.iterator();
   while (it.hasNext()) {
     prefArray.setUserID(i, it.nextLong());
     prefArray.setItemID(i, itemID);
     i++;
   }
   return prefArray;
 }
예제 #5
0
  public static void main(String[] args) throws IOException, TasteException {
    String file = "datafile/item.csv";
    DataModel model = new FileDataModel(new File(file));
    UserSimilarity user = new EuclideanDistanceSimilarity(model);
    NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model);
    Recommender r = new GenericUserBasedRecommender(model, neighbor, user);
    LongPrimitiveIterator iter = model.getUserIDs();

    while (iter.hasNext()) {
      long uid = iter.nextLong();
      List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM);
      System.out.printf("uid:%s", uid);
      for (RecommendedItem ritem : list) {
        System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue());
      }
      System.out.println();
    }
  }
  /** 对用户性别进行过滤 */
  public static void filterGender(
      long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel, String gender)
      throws TasteException, IOException {
    Set<Long> userids = getByGender("datafile/book/user.csv", gender);

    // 计算指定性别用户打分过的图书
    Set<Long> bookids = new HashSet<Long>();
    for (long uids : userids) {
      LongPrimitiveIterator iter = dataModel.getItemIDsFromUser(uids).iterator();
      while (iter.hasNext()) {
        long bookid = iter.next();
        bookids.add(bookid);
      }
    }

    IDRescorer rescorer = new FilterRescorer(bookids);
    List<RecommendedItem> list =
        recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer);
    RecommendFactory.showItems(uid, list, false);
  }
 @Override
 protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx)
     throws IOException, InterruptedException {
   int howmany = 0;
   FastIDSet bufferedVertexIDs = new FastIDSet();
   for (Vertex firstVertexOfMissingEdge : vertices) {
     LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator();
     while (bufferedVertexIdsIterator.hasNext()) {
       Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong());
       UndirectedEdge missingEdge =
           new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge);
       JoinableUndirectedEdge key = new JoinableUndirectedEdge(missingEdge, false);
       VertexOrMarker value = new VertexOrMarker(vertex);
       log.trace("{} -> {}", key, value);
       howmany++;
       ctx.write(key, value);
     }
     bufferedVertexIDs.add(firstVertexOfMissingEdge.getId());
   }
   log.debug("{} triads on vertex {}", howmany, vertex);
 }
 private void buildAverageDiffs() throws TasteException {
   try {
     buildAveragesLock.writeLock().lock();
     DataModel dataModel = getDataModel();
     LongPrimitiveIterator it = dataModel.getUserIDs();
     while (it.hasNext()) {
       long userID = it.nextLong();
       PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
       int size = prefs.length();
       for (int i = 0; i < size; i++) {
         long itemID = prefs.getItemID(i);
         float value = prefs.getValue(i);
         addDatumAndCreateIfNeeded(itemID, value, itemAverages);
         addDatumAndCreateIfNeeded(userID, value, userAverages);
         overallAveragePrefValue.addDatum(value);
       }
     }
   } finally {
     buildAveragesLock.writeLock().unlock();
   }
 }
  public static void main(String args[]) {

    try {

      // Loading the DATA;

      DataModel dm =
          new FileDataModel(
              new File(
                  "C:\\Users\\bryce\\Course Work\\3. Full Summer\\Big Data\\Final Project\\Yelp\\FINAL CODE\\Mahout\\data\\busirec_new.csv"));

      // We use the below line to relate businesses.
      // ItemSimilarity sim = new LogLikelihoodSimilarity(dm);

      TanimotoCoefficientSimilarity sim = new TanimotoCoefficientSimilarity((dm));

      // Using the below line get recommendations
      GenericItemBasedRecommender recommender = new GenericItemBasedRecommender(dm, sim);

      // Looping through every business.
      for (LongPrimitiveIterator items = dm.getItemIDs(); items.hasNext(); ) {
        long itemId = items.nextLong();

        // For each business we recommend 3 businesses.

        List<RecommendedItem> recommendations = recommender.mostSimilarItems(itemId, 2);

        for (RecommendedItem recommendation : recommendations) {

          System.out.println(
              itemId + "," + recommendation.getItemID() + "," + recommendation.getValue());
        }
      }
    } catch (IOException | TasteException e) {
      System.out.println(e);
    }
  }
예제 #10
0
  public static void main(String[] args)
      throws FileNotFoundException, TasteException, IOException, OptionException {
    DataModel model;
    model = new FileDataModel(new File("datasets/ratingsForMahout.dat"));

    File movieMapFile = new File("datasets/moviesForMahout.dat");
    HashMap<Long, String> movieMap = new HashMap<Long, String>();
    Scanner scan = new Scanner(movieMapFile);
    while (scan.hasNextLine()) {
      String[] line = scan.nextLine().split("\\|");
      movieMap.put(Long.parseLong(line[0]), line[1]);
    }
    scan.close();

    UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model);
    UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, userSimilarity, model);
    Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, userSimilarity);
    Recommender cachingRecommender = new CachingRecommender(recommender);

    for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext(); ) {
      long userId = it.nextLong();
      List<RecommendedItem> recommendations = cachingRecommender.recommend(userId, 10);
      if (recommendations.size() == 0) {
        System.out.println("User " + userId + ": no recommendations");
      }
      for (RecommendedItem recommendedItem : recommendations) {
        System.out.println(
            "User "
                + userId
                + ": "
                + movieMap.get(recommendedItem.getItemID())
                + "; value="
                + recommendedItem.getValue());
      }
    }
  }
 private void refreshUserAndItemPrefNum() throws TasteException {
   mUserPrefNum = new FastByIDMap<Integer>();
   mItemPrefNum = new FastByIDMap<Integer>();
   LongPrimitiveIterator it_user = getDataModel().getUserIDs();
   while (it_user.hasNext()) {
     long userID = it_user.nextLong();
     mUserPrefNum.put(userID, getDataModel().getPreferencesFromUser(userID).length());
   }
   LongPrimitiveIterator it_item = getDataModel().getItemIDs();
   while (it_item.hasNext()) {
     long itemID = it_item.nextLong();
     mItemPrefNum.put(itemID, getDataModel().getNumUsersWithPreferenceFor(itemID));
   }
 }
예제 #12
0
  /**
   * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
   * {@link DataModel} retains all this information in memory and is effectively immutable.
   *
   * @param userData users to include
   * @param timestamps optionally, provided timestamps of preferences as milliseconds since the
   *     epoch. User IDs are mapped to maps of item IDs to Long timestamps.
   */
  public GenericBooleanPrefDataModel(
      FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) {
    Preconditions.checkArgument(userData != null, "userData is null");

    this.preferenceFromUsers = userData;
    this.preferenceForItems = new FastByIDMap<>();
    FastIDSet itemIDSet = new FastIDSet();
    for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) {
      long userID = entry.getKey();
      FastIDSet itemIDs = entry.getValue();
      itemIDSet.addAll(itemIDs);
      LongPrimitiveIterator it = itemIDs.iterator();
      while (it.hasNext()) {
        long itemID = it.nextLong();
        FastIDSet userIDs = preferenceForItems.get(itemID);
        if (userIDs == null) {
          userIDs = new FastIDSet(2);
          preferenceForItems.put(itemID, userIDs);
        }
        userIDs.add(userID);
      }
    }

    this.itemIDs = itemIDSet.toArray();
    itemIDSet = null; // Might help GC -- this is big
    Arrays.sort(itemIDs);

    this.userIDs = new long[userData.size()];
    int i = 0;
    LongPrimitiveIterator it = userData.keySetIterator();
    while (it.hasNext()) {
      userIDs[i++] = it.next();
    }
    Arrays.sort(userIDs);

    this.timestamps = timestamps;
  }
  @Override
  public double userSimilarity(long userID1, long userID2) throws TasteException {

    DataModel dataModel = getDataModel();

    FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1);
    FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2);

    int xPrefsSize = xPrefs.size();
    int yPrefsSize = yPrefs.size();
    if (xPrefsSize == 0 && yPrefsSize == 0) {
      return Double.NaN;
    }
    if (xPrefsSize == 0 || yPrefsSize == 0) {
      return 0.0;
    }

    double intersection = 0.0;
    double union = 0.0;

    for (LongPrimitiveIterator it_item = xPrefs.iterator(); it_item.hasNext(); ) {
      long itemID = (long) it_item.nextLong();
      double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID);
      if (yPrefs.contains(itemID)) {
        intersection += weight;
        union -= weight;
      }
      union += weight;
    }
    for (LongPrimitiveIterator it_item = yPrefs.iterator(); it_item.hasNext(); ) {
      long itemID = (long) it_item.nextLong();
      double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID);
      union += weight;
    }

    return Math.log(intersection) / Math.log(union);
  }
  @Override
  public double evaluate(
      RecommenderBuilder recommenderBuilder,
      DataModelBuilder dataModelBuilder,
      DataModel dataModel,
      double trainingPercentage,
      double evaluationPercentage)
      throws TasteException {
    Preconditions.checkNotNull(recommenderBuilder);
    Preconditions.checkNotNull(dataModel);
    Preconditions.checkArgument(
        trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
        "Invalid trainingPercentage: "
            + trainingPercentage
            + ". Must be: 0.0 <= trainingPercentage <= 1.0");
    Preconditions.checkArgument(
        evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
        "Invalid evaluationPercentage: "
            + evaluationPercentage
            + ". Must be: 0.0 <= evaluationPercentage <= 1.0");

    log.info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);

    int numUsers = dataModel.getNumUsers();
    FastByIDMap<PreferenceArray> trainingPrefs =
        new FastByIDMap<PreferenceArray>(1 + (int) (evaluationPercentage * numUsers));
    FastByIDMap<PreferenceArray> testPrefs =
        new FastByIDMap<PreferenceArray>(1 + (int) (evaluationPercentage * numUsers));

    totalOfTrainingRatingsFromSource = 0;
    totalOfTrainingRatingsFromTargetWithContext = 0;
    totalOfTrainingRatingsFromTargetWithoutContext = 0;
    totalOfTestRatings = 0;

    LongPrimitiveIterator it = dataModel.getUserIDs();
    while (it.hasNext()) {
      long userID = it.nextLong();
      if (random.nextDouble() < evaluationPercentage) {
        splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
      }
    }

    // System.out.println("Training (Source, TargetWithoutContext, TargetWithContext):
    // "+totalOfTrainingRatingsFromSource+"/"+totalOfTrainingRatingsFromTargetWithoutContext+"/"+totalOfTrainingRatingsFromTargetWithContext);
    // int totalTraining =
    // (totalOfTrainingRatingsFromSource+totalOfTrainingRatingsFromTargetWithContext+totalOfTrainingRatingsFromTargetWithoutContext);
    // System.out.println("Training/Test: "+totalTraining+"/"+totalOfTestRatings);

    DataModel newDataModel =
        dataModel instanceof ContextualDataModel
            ? new ContextualDataModel(trainingPrefs)
            : new GenericDataModel(trainingPrefs);

    DataModel trainingModel =
        dataModelBuilder == null ? newDataModel : dataModelBuilder.buildDataModel(trainingPrefs);

    Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);

    double result = getEvaluation(testPrefs, recommender);
    log.info("Evaluation result: {}", result);
    return result;
  }