private static List<int[]> getPopularResources(BookmarkReader reader, int count, int trainSize) { List<int[]> resources = new ArrayList<int[]>(); Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>(); for (int i = 0; i < reader.getResources().size(); i++) { countMap.put(i, reader.getResourceCounts().get(i)); } Map<Integer, Integer> sortedCountMap = new TreeMap<Integer, Integer>(new IntMapComparator(countMap)); sortedCountMap.putAll(countMap); for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { List<Integer> userResources = UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID); // System.out.println(userResources.size()); List<Integer> resIDs = new ArrayList<Integer>(); int i = 0; for (Integer key : sortedCountMap.keySet()) { if (i < count) { if (!userResources.contains(key)) { resIDs.add(key); i++; } } else { break; } } resources.add(Ints.toArray(resIDs)); } return resources; }
private static List<int[]> getRandomResources(BookmarkReader reader, int count, int trainSize) { List<int[]> resources = new ArrayList<int[]>(); int resCount = reader.getResources().size(); for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { List<Integer> userResources = UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID); List<Integer> resIDs = new ArrayList<Integer>(); int i = 0; for (Integer res : Utilities.getRandomIndices(0, resCount - 1)) { if (i < count) { if (!userResources.contains(res)) { resIDs.add(res); i++; } } else { break; } } resources.add(Ints.toArray(resIDs)); } return resources; }
private static void getStatistics(String dataset, boolean writeAll) { if (TOPIC_NAME != null) { dataset += ("_" + TOPIC_NAME); } BookmarkReader reader = new BookmarkReader(0, false); reader.readFile(dataset); int bookmarks = reader.getBookmarks().size(); System.out.println("Posts: " + bookmarks); int users = reader.getUsers().size(); System.out.println("Users: " + users); int resources = reader.getResources().size(); System.out.println("Resources: " + resources); int tags = reader.getTags().size(); System.out.println("Tags: " + tags); int tagAssignments = reader.getTagAssignmentsCount(); System.out.println("Tag-Assignments: " + tagAssignments); int categories = reader.getCategories().size(); System.out.println("Topics: " + categories); double avgTASPerPost = (double) tagAssignments / bookmarks; System.out.println("Avg. TAS per post: " + avgTASPerPost); double avgBookmarksPerUser = (double) bookmarks / users; System.out.println("Avg. resources/posts per user: "******"Avg. users/posts per resource: " + avgBookmarksPerResource); if (writeAll) { try { getTrainTestSize(dataset); FileWriter userWriter = new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt")); BufferedWriter userBW = new BufferedWriter(userWriter); userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n"); List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE); List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE); System.out.println(); double avgTopicsPerUser = 0.0; double avgTopicDiversityPerUser = 0.0; List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false); List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false); for (int userID : testUsers) { Map<Integer, Double> topicsOfUser = userTopics.get(userID); double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID)); userBW.write( userID + "| " + reader.getUserCounts().get(userID) + "| " + topicsOfUser.keySet().size() + "| " + topicDiversityOfUser + "\n"); avgTopicsPerUser += topicsOfUser.keySet().size(); avgTopicDiversityPerUser += topicDiversityOfUser; } System.out.println("Avg. topics per user: "******"Avg. topic-similarity per user: "******"Avg. topics per resource: " + avgTopicsPerResource); userBW.flush(); userBW.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } System.out.println(); }