private static List<int[]> getPopularResources(BookmarkReader reader, int count, int trainSize) { List<int[]> resources = new ArrayList<int[]>(); Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>(); for (int i = 0; i < reader.getResources().size(); i++) { countMap.put(i, reader.getResourceCounts().get(i)); } Map<Integer, Integer> sortedCountMap = new TreeMap<Integer, Integer>(new IntMapComparator(countMap)); sortedCountMap.putAll(countMap); for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { List<Integer> userResources = UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID); // System.out.println(userResources.size()); List<Integer> resIDs = new ArrayList<Integer>(); int i = 0; for (Integer key : sortedCountMap.keySet()) { if (i < count) { if (!userResources.contains(key)) { resIDs.add(key); i++; } } else { break; } } resources.add(Ints.toArray(resIDs)); } return resources; }
private static void evaluateAllTagRecommenderApproaches(String sampleDir, String samplePath) { getTrainTestSize(samplePath); BookmarkReader reader = new BookmarkReader(TRAIN_SIZE, false); reader.readFile(samplePath); evaluate(sampleDir, samplePath, "apr", null, true, false, reader); evaluate(sampleDir, samplePath, "bll_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "bll_ac_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "bll_c_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "bll_c_ac_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "cf_5", null, true, false, reader); evaluate(sampleDir, samplePath, "fr", null, true, false, reader); evaluate(sampleDir, samplePath, "girp", null, true, false, reader); evaluate(sampleDir, samplePath, "girptm", null, true, false, reader); evaluate(sampleDir, samplePath, "layers_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "layerstagbll_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "layerstopicbll_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "lda_1000", null, true, false, reader); evaluate(sampleDir, samplePath, "mp", null, true, false, reader); evaluate(sampleDir, samplePath, "mp_r_5", null, true, false, reader); evaluate(sampleDir, samplePath, "mp_u_5", null, true, false, reader); evaluate(sampleDir, samplePath, "mp_ur_5", null, true, false, reader); evaluate(sampleDir, samplePath, "rescf_5", null, true, false, reader); evaluate(sampleDir, samplePath, "usercf_5", null, true, false, reader); evaluate(sampleDir, samplePath, "userlayers_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "userlayerstagbll_5_5", null, true, false, reader); evaluate(sampleDir, samplePath, "userlayerstopicbll_5_5", null, true, false, reader); }
public static void predictRandomResources(String filename, int trainSize) { // filename += "_res"; // TODO: do not use complete size BookmarkReader reader = new BookmarkReader(0, false); reader.readFile(filename); List<int[]> values = getRandomResources(reader, 10, trainSize); PredictionFileWriter writer = new PredictionFileWriter(reader, values); writer.writeResourcePredictionsToFile(filename + "_rand", trainSize, 0); }
private static List<int[]> getPerfectTags(BookmarkReader reader, int sampleSize, int limit) { List<int[]> tags = new ArrayList<int[]>(); int trainSize = reader.getUserLines().size() - sampleSize; for (UserData data : reader.getUserLines().subList(trainSize, trainSize + sampleSize)) { List<Integer> t = data.getTags(); while (t.size() < limit) { t.add(-1); } tags.add(Ints.toArray(t)); } return tags; }
public static void predictPopularTags(String filename, int trainSize, int sampleSize) { // filename += "_res"; BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); List<int[]> values = getPopularTags(reader, sampleSize, 10); // List<int[]> values = getPerfectTags(reader, sampleSize, 10); reader.setUserLines(reader.getUserLines().subList(trainSize, reader.getUserLines().size())); PredictionFileWriter writer = new PredictionFileWriter(reader, values); writer.writeFile(filename + "_mp"); Utilities.writeStringToFile("./data/metrics/" + filename + "_mp" + "_TIME.txt", timeString); }
public static int[] getPopularTagList(BookmarkReader reader, int size) { Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>(); for (int i = 0; i < reader.getTagCounts().size(); i++) { countMap.put(i, reader.getTagCounts().get(i)); } Map<Integer, Integer> sortedCountMap = new TreeMap<Integer, Integer>(new IntMapComparator(countMap)); sortedCountMap.putAll(countMap); int[] tagIDs = new int[size]; int i = 0; for (Integer key : sortedCountMap.keySet()) { if (i < size) { tagIDs[i++] = key; } else { break; } } return tagIDs; }
private static List<int[]> getRandomResources(BookmarkReader reader, int count, int trainSize) { List<int[]> resources = new ArrayList<int[]>(); int resCount = reader.getResources().size(); for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { List<Integer> userResources = UserData.getResourcesFromUser(reader.getUserLines().subList(0, trainSize), userID); List<Integer> resIDs = new ArrayList<Integer>(); int i = 0; for (Integer res : Utilities.getRandomIndices(0, resCount - 1)) { if (i < count) { if (!userResources.contains(res)) { resIDs.add(res); i++; } } else { break; } } resources.add(Ints.toArray(resIDs)); } return resources; }
private static void getTrainTestSize(String sample) { if (TOPIC_NAME != null) { sample += ("_" + TOPIC_NAME); } BookmarkReader trainReader = new BookmarkReader(-1, false); trainReader.readFile(sample + "_train"); TRAIN_SIZE = trainReader.getBookmarks().size(); System.out.println("Train-size: " + TRAIN_SIZE); BookmarkReader testReader = new BookmarkReader(-1, false); testReader.readFile(sample + "_test"); TEST_SIZE = testReader.getBookmarks().size(); System.out.println("Test-size: " + TEST_SIZE); }
// passing the trainSize means that MyMediaLite files will be evaluated private static void evaluate( String sampleDir, String sampleName, String prefix, String postfix, boolean calcTags, boolean tensor, BookmarkReader reader) { if (reader == null) { getTrainTestSize(sampleName + (postfix != null ? "_" + postfix : "")); reader = new BookmarkReader(TRAIN_SIZE, false); reader.readFile(sampleName + (postfix != null ? "_" + postfix : "")); } if (calcTags) { writeMetrics( sampleDir, sampleName, prefix, 1, 10, postfix, reader, tensor ? TRAIN_SIZE : null); } else { writeMetricsForResources( sampleDir, sampleName, prefix, 1, 20, postfix, reader, tensor ? TRAIN_SIZE : null); } }
private static void getStatistics(String dataset, boolean writeAll) { if (TOPIC_NAME != null) { dataset += ("_" + TOPIC_NAME); } BookmarkReader reader = new BookmarkReader(0, false); reader.readFile(dataset); int bookmarks = reader.getBookmarks().size(); System.out.println("Posts: " + bookmarks); int users = reader.getUsers().size(); System.out.println("Users: " + users); int resources = reader.getResources().size(); System.out.println("Resources: " + resources); int tags = reader.getTags().size(); System.out.println("Tags: " + tags); int tagAssignments = reader.getTagAssignmentsCount(); System.out.println("Tag-Assignments: " + tagAssignments); int categories = reader.getCategories().size(); System.out.println("Topics: " + categories); double avgTASPerPost = (double) tagAssignments / bookmarks; System.out.println("Avg. TAS per post: " + avgTASPerPost); double avgBookmarksPerUser = (double) bookmarks / users; System.out.println("Avg. resources/posts per user: "******"Avg. users/posts per resource: " + avgBookmarksPerResource); if (writeAll) { try { getTrainTestSize(dataset); FileWriter userWriter = new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt")); BufferedWriter userBW = new BufferedWriter(userWriter); userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n"); List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE); List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE); System.out.println(); double avgTopicsPerUser = 0.0; double avgTopicDiversityPerUser = 0.0; List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false); List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false); for (int userID : testUsers) { Map<Integer, Double> topicsOfUser = userTopics.get(userID); double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID)); userBW.write( userID + "| " + reader.getUserCounts().get(userID) + "| " + topicsOfUser.keySet().size() + "| " + topicDiversityOfUser + "\n"); avgTopicsPerUser += topicsOfUser.keySet().size(); avgTopicDiversityPerUser += topicDiversityOfUser; } System.out.println("Avg. topics per user: "******"Avg. topic-similarity per user: "******"Avg. topics per resource: " + avgTopicsPerResource); userBW.flush(); userBW.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } System.out.println(); }