private double doItemSimilarity(long itemID1, long itemID2, FastIDSet preferring1)
      throws TasteException {
    double intersection = 0.0;
    double union = 0.0;

    for (Preference pref : getDataModel().getPreferencesForItem(itemID2)) {
      long userID = pref.getUserID();
      double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID);
      if (preferring1.contains(userID)) {
        intersection += weight;
        union -= weight;
      }
      union += weight;
    }
    for (LongPrimitiveIterator it_user = preferring1.iterator(); it_user.hasNext(); ) {
      long userID = (long) it_user.nextLong();
      double weight = (double) getDataModel().getNumItems() / mUserPrefNum.get(userID);
      union += weight;
    }

    if (intersection == 0) {
      return Double.NaN;
    }

    return Math.log(intersection) / Math.log(union);
  }
Ejemplo n.º 2
0
  /**
   * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded
   */
  @Test
  public void testUserVectorSplitterMapperUserExclusion() throws Exception {
    Mapper<VarLongWritable, VectorWritable, VarIntWritable, VectorOrPrefWritable>.Context context =
        EasyMock.createMock(Mapper.Context.class);

    context.write(
        EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
    context.write(
        EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));

    EasyMock.replay(context);

    FastIDSet usersToRecommendFor = new FastIDSet();
    usersToRecommendFor.add(123L);

    UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
    setField(mapper, "maxPrefsPerUserConsidered", 10);
    setField(mapper, "usersToRecommendFor", usersToRecommendFor);

    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    vector.set(34, 0.5);
    vector.set(56, 0.7);

    mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
    mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context);

    EasyMock.verify(context);
  }
 private FastIDSet toUserFastIDSet(PreferenceArray array) {
   FastIDSet fastIDSet = new FastIDSet();
   for (Preference preference : array) {
     fastIDSet.add(preference.getUserID());
   }
   return fastIDSet;
 }
Ejemplo n.º 4
0
 @Override
 public Float getPreferenceValue(long userID, long itemID) throws NoSuchUserException {
   FastIDSet itemIDs = preferenceFromUsers.get(userID);
   if (itemIDs == null) {
     throw new NoSuchUserException(userID);
   }
   if (itemIDs.contains(itemID)) {
     return 1.0f;
   }
   return null;
 }
Ejemplo n.º 5
0
 public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<PreferenceArray> data) {
   for (Map.Entry<Long, Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) {
     PreferenceArray prefArray = (PreferenceArray) entry.getValue();
     int size = prefArray.length();
     FastIDSet itemIDs = new FastIDSet(size);
     for (int i = 0; i < size; i++) {
       itemIDs.add(prefArray.getItemID(i));
     }
     entry.setValue(itemIDs);
   }
   return (FastByIDMap<FastIDSet>) (FastByIDMap<?>) data;
 }
Ejemplo n.º 6
0
 @Override
 public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException {
   FastIDSet userIDs = preferenceForItems.get(itemID);
   if (userIDs == null) {
     throw new NoSuchItemException(itemID);
   }
   PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size());
   int i = 0;
   LongPrimitiveIterator it = userIDs.iterator();
   while (it.hasNext()) {
     prefArray.setUserID(i, it.nextLong());
     prefArray.setItemID(i, itemID);
     i++;
   }
   return prefArray;
 }
  @Test
  public void testStrategy() throws TasteException {
    FastIDSet itemIDsFromUser123 = new FastIDSet();
    itemIDsFromUser123.add(1L);

    FastIDSet itemIDsFromUser456 = new FastIDSet();
    itemIDsFromUser456.add(1L);
    itemIDsFromUser456.add(2L);

    List<Preference> prefs = new ArrayList<Preference>();
    prefs.add(new GenericPreference(123L, 1L, 1.0f));
    prefs.add(new GenericPreference(456L, 1L, 1.0f));
    PreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs);

    DataModel dataModel = EasyMock.createMock(DataModel.class);
    EasyMock.expect(dataModel.getPreferencesForItem(1L)).andReturn(preferencesForItem1);
    EasyMock.expect(dataModel.getItemIDsFromUser(123L)).andReturn(itemIDsFromUser123);
    EasyMock.expect(dataModel.getItemIDsFromUser(456L)).andReturn(itemIDsFromUser456);

    PreferenceArray prefArrayOfUser123 =
        new GenericUserPreferenceArray(Arrays.asList(new GenericPreference(123L, 1L, 1.0f)));

    CandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy();

    EasyMock.replay(dataModel);

    FastIDSet candidateItems = strategy.getCandidateItems(123L, prefArrayOfUser123, dataModel);
    assertEquals(1, candidateItems.size());
    assertTrue(candidateItems.contains(2L));

    EasyMock.verify(dataModel);
  }
  @Override
  public List<RecommendedItem> recommend(
      long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
      throws TasteException {
    Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
    log.debug("Recommending items for user ID '{}'", userID);

    PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID);
    FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems);

    TopItems.Estimator<Long> estimator = new Estimator(userID);

    List<RecommendedItem> topItems =
        TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, estimator);

    log.debug("Recommendations are: {}", topItems);
    return topItems;
  }
 @Override
 protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx)
     throws IOException, InterruptedException {
   int howmany = 0;
   FastIDSet bufferedVertexIDs = new FastIDSet();
   for (Vertex firstVertexOfMissingEdge : vertices) {
     LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator();
     while (bufferedVertexIdsIterator.hasNext()) {
       Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong());
       UndirectedEdge missingEdge =
           new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge);
       JoinableUndirectedEdge key = new JoinableUndirectedEdge(missingEdge, false);
       VertexOrMarker value = new VertexOrMarker(vertex);
       log.trace("{} -> {}", key, value);
       howmany++;
       ctx.write(key, value);
     }
     bufferedVertexIDs.add(firstVertexOfMissingEdge.getId());
   }
   log.debug("{} triads on vertex {}", howmany, vertex);
 }
  @Override
  public double userSimilarity(long userID1, long userID2) throws TasteException {

    FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1);
    FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2);

    int prefs1Size = prefs1.size();
    int prefs2Size = prefs2.size();
    int intersectionSize =
        prefs1Size < prefs2Size ? prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
    if (intersectionSize == 0) {
      return Double.NaN;
    }

    // int numItems = dataModel.getNumItems();
    // int numItems = Math.max(prefs1Size, prefs2Size);

    // int numItems = prefs1Size;

    //    double distance1 =  (double) (prefs1Size - intersectionSize)/(double) 2;
    //    double distance2 =  (double) (prefs2Size - intersectionSize)/(double) 2;
    //    double distance = (distance1 + distance2);

    double distance = (double) (prefs1Size - intersectionSize);

    double similarity = 1.0 / (1.0 + distance);
    ////// System.out.println( prefs1Size  + ", " + prefs2Size + ", " + intersectionSize + ", " +
    // similarity);
    return similarity;
  }
Ejemplo n.º 11
0
 @Override
 public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) {
   FastIDSet userIDs1 = preferenceForItems.get(itemID1);
   if (userIDs1 == null) {
     return 0;
   }
   FastIDSet userIDs2 = preferenceForItems.get(itemID2);
   if (userIDs2 == null) {
     return 0;
   }
   return userIDs1.size() < userIDs2.size()
       ? userIDs2.intersectionSize(userIDs1)
       : userIDs1.intersectionSize(userIDs2);
 }
Ejemplo n.º 12
0
  @Override
  public double userSimilarity(long userID1, long userID2) throws TasteException {

    FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1);
    FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2);

    int prefs1Size = prefs1.size();
    int prefs2Size = prefs2.size();
    int intersectionSize =
        prefs1Size < prefs2Size ? prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
    if (intersectionSize == 0) {
      return Double.NaN;
    }
    int numItems = dataModel.getNumItems();
    double logLikelihood =
        twoLogLambda(
            intersectionSize, prefs1Size - intersectionSize, prefs2Size, numItems - prefs2Size);
    return 1.0 - 1.0 / (1.0 + logLikelihood);
  }
Ejemplo n.º 13
0
  /**
   * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
   * {@link DataModel} retains all this information in memory and is effectively immutable.
   *
   * @param userData users to include
   * @param timestamps optionally, provided timestamps of preferences as milliseconds since the
   *     epoch. User IDs are mapped to maps of item IDs to Long timestamps.
   */
  public GenericBooleanPrefDataModel(
      FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) {
    Preconditions.checkArgument(userData != null, "userData is null");

    this.preferenceFromUsers = userData;
    this.preferenceForItems = new FastByIDMap<>();
    FastIDSet itemIDSet = new FastIDSet();
    for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) {
      long userID = entry.getKey();
      FastIDSet itemIDs = entry.getValue();
      itemIDSet.addAll(itemIDs);
      LongPrimitiveIterator it = itemIDs.iterator();
      while (it.hasNext()) {
        long itemID = it.nextLong();
        FastIDSet userIDs = preferenceForItems.get(itemID);
        if (userIDs == null) {
          userIDs = new FastIDSet(2);
          preferenceForItems.put(itemID, userIDs);
        }
        userIDs.add(userID);
      }
    }

    this.itemIDs = itemIDSet.toArray();
    itemIDSet = null; // Might help GC -- this is big
    Arrays.sort(itemIDs);

    this.userIDs = new long[userData.size()];
    int i = 0;
    LongPrimitiveIterator it = userData.keySetIterator();
    while (it.hasNext()) {
      userIDs[i++] = it.next();
    }
    Arrays.sort(userIDs);

    this.timestamps = timestamps;
  }
  @Override
  public double userSimilarity(long userID1, long userID2) throws TasteException {

    DataModel dataModel = getDataModel();

    FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1);
    FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2);

    int xPrefsSize = xPrefs.size();
    int yPrefsSize = yPrefs.size();
    if (xPrefsSize == 0 && yPrefsSize == 0) {
      return Double.NaN;
    }
    if (xPrefsSize == 0 || yPrefsSize == 0) {
      return 0.0;
    }

    double intersection = 0.0;
    double union = 0.0;

    for (LongPrimitiveIterator it_item = xPrefs.iterator(); it_item.hasNext(); ) {
      long itemID = (long) it_item.nextLong();
      double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID);
      if (yPrefs.contains(itemID)) {
        intersection += weight;
        union -= weight;
      }
      union += weight;
    }
    for (LongPrimitiveIterator it_item = yPrefs.iterator(); it_item.hasNext(); ) {
      long itemID = (long) it_item.nextLong();
      double weight = (double) getDataModel().getNumUsers() / mItemPrefNum.get(itemID);
      union += weight;
    }

    return Math.log(intersection) / Math.log(union);
  }
Ejemplo n.º 15
0
 @Override
 public int getNumUsersWithPreferenceFor(long itemID) {
   FastIDSet userIDs1 = preferenceForItems.get(itemID);
   return userIDs1 == null ? 0 : userIDs1.size();
 }