private static List<int[]> getPopularResources(BookmarkReader reader, int count, int trainSize) {
    List<int[]> resources = new ArrayList<int[]>();
    Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>();
    for (int i = 0; i < reader.getResources().size(); i++) {
      countMap.put(i, reader.getResourceCounts().get(i));
    }
    Map<Integer, Integer> sortedCountMap =
        new TreeMap<Integer, Integer>(new IntMapComparator(countMap));
    sortedCountMap.putAll(countMap);

    for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) {
      List<Integer> userResources =
          UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID);
      // System.out.println(userResources.size());
      List<Integer> resIDs = new ArrayList<Integer>();
      int i = 0;
      for (Integer key : sortedCountMap.keySet()) {
        if (i < count) {
          if (!userResources.contains(key)) {
            resIDs.add(key);
            i++;
          }
        } else {
          break;
        }
      }
      resources.add(Ints.toArray(resIDs));
    }
    return resources;
  }
  private static List<int[]> getRandomResources(BookmarkReader reader, int count, int trainSize) {
    List<int[]> resources = new ArrayList<int[]>();
    int resCount = reader.getResources().size();

    for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) {
      List<Integer> userResources =
          UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID);

      List<Integer> resIDs = new ArrayList<Integer>();
      int i = 0;
      for (Integer res : Utilities.getRandomIndices(0, resCount - 1)) {
        if (i < count) {
          if (!userResources.contains(res)) {
            resIDs.add(res);
            i++;
          }
        } else {
          break;
        }
      }
      resources.add(Ints.toArray(resIDs));
    }
    return resources;
  }
Пример #3
0
  private static void getStatistics(String dataset, boolean writeAll) {
    if (TOPIC_NAME != null) {
      dataset += ("_" + TOPIC_NAME);
    }
    BookmarkReader reader = new BookmarkReader(0, false);
    reader.readFile(dataset);

    int bookmarks = reader.getBookmarks().size();
    System.out.println("Posts: " + bookmarks);
    int users = reader.getUsers().size();
    System.out.println("Users: " + users);
    int resources = reader.getResources().size();
    System.out.println("Resources: " + resources);
    int tags = reader.getTags().size();
    System.out.println("Tags: " + tags);
    int tagAssignments = reader.getTagAssignmentsCount();
    System.out.println("Tag-Assignments: " + tagAssignments);
    int categories = reader.getCategories().size();
    System.out.println("Topics: " + categories);
    double avgTASPerPost = (double) tagAssignments / bookmarks;
    System.out.println("Avg. TAS per post: " + avgTASPerPost);
    double avgBookmarksPerUser = (double) bookmarks / users;
    System.out.println("Avg. resources/posts per user: "******"Avg. users/posts per resource: " + avgBookmarksPerResource);

    if (writeAll) {
      try {
        getTrainTestSize(dataset);
        FileWriter userWriter =
            new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt"));
        BufferedWriter userBW = new BufferedWriter(userWriter);
        userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n");
        List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE);
        List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE);
        System.out.println();

        double avgTopicsPerUser = 0.0;
        double avgTopicDiversityPerUser = 0.0;
        List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false);
        List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false);
        for (int userID : testUsers) {
          Map<Integer, Double> topicsOfUser = userTopics.get(userID);
          double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID));
          userBW.write(
              userID
                  + "| "
                  + reader.getUserCounts().get(userID)
                  + "| "
                  + topicsOfUser.keySet().size()
                  + "| "
                  + topicDiversityOfUser
                  + "\n");
          avgTopicsPerUser += topicsOfUser.keySet().size();
          avgTopicDiversityPerUser += topicDiversityOfUser;
        }
        System.out.println("Avg. topics per user: "******"Avg. topic-similarity per user: "******"Avg. topics per resource: " + avgTopicsPerResource);
        userBW.flush();
        userBW.close();
      } catch (IOException e) {
        System.out.println(e.getMessage());
      }
    }

    System.out.println();
  }