private static void getStatistics(String dataset, boolean writeAll) { if (TOPIC_NAME != null) { dataset += ("_" + TOPIC_NAME); } BookmarkReader reader = new BookmarkReader(0, false); reader.readFile(dataset); int bookmarks = reader.getBookmarks().size(); System.out.println("Posts: " + bookmarks); int users = reader.getUsers().size(); System.out.println("Users: " + users); int resources = reader.getResources().size(); System.out.println("Resources: " + resources); int tags = reader.getTags().size(); System.out.println("Tags: " + tags); int tagAssignments = reader.getTagAssignmentsCount(); System.out.println("Tag-Assignments: " + tagAssignments); int categories = reader.getCategories().size(); System.out.println("Topics: " + categories); double avgTASPerPost = (double) tagAssignments / bookmarks; System.out.println("Avg. TAS per post: " + avgTASPerPost); double avgBookmarksPerUser = (double) bookmarks / users; System.out.println("Avg. resources/posts per user: "******"Avg. users/posts per resource: " + avgBookmarksPerResource); if (writeAll) { try { getTrainTestSize(dataset); FileWriter userWriter = new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt")); BufferedWriter userBW = new BufferedWriter(userWriter); userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n"); List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE); List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE); System.out.println(); double avgTopicsPerUser = 0.0; double avgTopicDiversityPerUser = 0.0; List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false); List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false); for (int userID : testUsers) { Map<Integer, Double> topicsOfUser = userTopics.get(userID); double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID)); userBW.write( userID + "| " + reader.getUserCounts().get(userID) + "| " + topicsOfUser.keySet().size() + "| " + topicDiversityOfUser + "\n"); avgTopicsPerUser += topicsOfUser.keySet().size(); avgTopicDiversityPerUser += topicDiversityOfUser; } System.out.println("Avg. topics per user: "******"Avg. topic-similarity per user: "******"Avg. topics per resource: " + avgTopicsPerResource); userBW.flush(); userBW.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } System.out.println(); }
public MetricsCalculator( PredictionFileReader reader, String outputFile, int k, BookmarkReader bookmarkReader, boolean recommTags) { this.reader = reader; if (recommTags) { // TODO: check this.bookmarkReader = bookmarkReader; } BufferedWriter bw = null; // TODO: Enable if you need data for statistical tests if ((recommTags && (k == 5 || k == 10)) || (!recommTags && k == 20)) { try { FileWriter writer = new FileWriter(new File(outputFile + "_" + k + ".txt"), true); bw = new BufferedWriter(writer); } catch (Exception e) { e.printStackTrace(); } } // double count = this.reader.getPredictionCount(); // only user where there are recommendations double count = this.reader.getPredictionData().size(); // all users double recall = 0.0, precision = 0.0, mrr = 0.0, fMeasure = 0.0, map = 0.0, nDCG = 0.0, diversity = 0.0, serendipity = 0.0; List<Map<Integer, Double>> entityFeatures = null; List<Map<Integer, Integer>> tagCountMaps = null; List<Bookmark> trainList = null; if (this.bookmarkReader != null) { trainList = this.bookmarkReader.getBookmarks().subList(0, this.bookmarkReader.getCountLimit()); if (recommTags) { tagCountMaps = Utilities.getResMaps(trainList); entityFeatures = Utilities.getResourceMapsForTags(trainList); } else { entityFeatures = Utilities.getUniqueTopicMaps(trainList, true); // TODO: check regarding unique! } } // process each predicted line for (PredictionData data : this.reader.getPredictionData()) { if (data == null) { if (bw != null) { try { bw.write("0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0\n"); } catch (Exception e) { e.printStackTrace(); } } continue; } double cRecall = data.getRecall(); recall += cRecall; double cPrecision = data.getPrecision(recommTags); precision += cPrecision; double cFMeasure = data.getFMeasure(recommTags); fMeasure += cFMeasure; double cMRR = data.getMRR(); mrr += cMRR; double cMAP = data.getMAP(); map += cMAP; double cNDCG = data.getNDCG(); nDCG += cNDCG; double cDiversity = 0.0, cSerendipity = 0.0; if (this.bookmarkReader != null) { if (recommTags) { cDiversity = data.getTagDiversity(entityFeatures); if (data.getResID() < tagCountMaps.size()) { Map<Integer, Integer> tagCountMap = tagCountMaps.get(data.getResID()); cSerendipity = data.getTagSerendipity(tagCountMap, false); } else { cSerendipity = 1.0; } } else { List<Integer> knownEntities = Bookmark.getResourcesFromUser(trainList, data.getUserID()); cDiversity = data.getDiversity(entityFeatures, true); cSerendipity = data.getSerendipity(entityFeatures, knownEntities); } diversity += cDiversity; serendipity += cSerendipity; } if (bw != null) { try { bw.write(Double.toString(cRecall).replace(',', '.') + ";"); bw.write(Double.toString(cPrecision).replace(',', '.') + ";"); bw.write(Double.toString(cFMeasure).replace(',', '.') + ";"); bw.write(Double.toString(cMRR).replace(',', '.') + ";"); bw.write(Double.toString(cMAP).replace(',', '.') + ";"); bw.write(Double.toString(cNDCG).replace(',', '.') + ";"); bw.write(Double.toString(data.getCoverage()).replace(',', '.') + ";"); bw.write(Double.toString(cDiversity).replace('.', ',') + ";"); bw.write(Double.toString(cSerendipity).replace('.', ',')); bw.write("\n"); } catch (Exception e) { e.printStackTrace(); } } } this.recall = recall / count; this.precision = precision / count; this.fMeasure = fMeasure / count; this.mrr = mrr / count; this.map = map / count; this.nDCG = nDCG / count; this.userCoverage = (double) this.reader.getPredictionCount() / (double) this.reader.getPredictionData().size(); this.diversity = diversity / count; this.serendipity = serendipity / count; // TODO: enable in case statistics are needed if (bw != null) { try { // bw.write("\n"); bw.flush(); bw.close(); } catch (Exception e) { e.printStackTrace(); } } }