コード例 #1
0
ファイル: Pipeline.java プロジェクト: mindis/TagRec
  private static void getStatistics(String dataset, boolean writeAll) {
    if (TOPIC_NAME != null) {
      dataset += ("_" + TOPIC_NAME);
    }
    BookmarkReader reader = new BookmarkReader(0, false);
    reader.readFile(dataset);

    int bookmarks = reader.getBookmarks().size();
    System.out.println("Posts: " + bookmarks);
    int users = reader.getUsers().size();
    System.out.println("Users: " + users);
    int resources = reader.getResources().size();
    System.out.println("Resources: " + resources);
    int tags = reader.getTags().size();
    System.out.println("Tags: " + tags);
    int tagAssignments = reader.getTagAssignmentsCount();
    System.out.println("Tag-Assignments: " + tagAssignments);
    int categories = reader.getCategories().size();
    System.out.println("Topics: " + categories);
    double avgTASPerPost = (double) tagAssignments / bookmarks;
    System.out.println("Avg. TAS per post: " + avgTASPerPost);
    double avgBookmarksPerUser = (double) bookmarks / users;
    System.out.println("Avg. resources/posts per user: "******"Avg. users/posts per resource: " + avgBookmarksPerResource);

    if (writeAll) {
      try {
        getTrainTestSize(dataset);
        FileWriter userWriter =
            new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt"));
        BufferedWriter userBW = new BufferedWriter(userWriter);
        userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n");
        List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE);
        List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE);
        System.out.println();

        double avgTopicsPerUser = 0.0;
        double avgTopicDiversityPerUser = 0.0;
        List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false);
        List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false);
        for (int userID : testUsers) {
          Map<Integer, Double> topicsOfUser = userTopics.get(userID);
          double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID));
          userBW.write(
              userID
                  + "| "
                  + reader.getUserCounts().get(userID)
                  + "| "
                  + topicsOfUser.keySet().size()
                  + "| "
                  + topicDiversityOfUser
                  + "\n");
          avgTopicsPerUser += topicsOfUser.keySet().size();
          avgTopicDiversityPerUser += topicDiversityOfUser;
        }
        System.out.println("Avg. topics per user: "******"Avg. topic-similarity per user: "******"Avg. topics per resource: " + avgTopicsPerResource);
        userBW.flush();
        userBW.close();
      } catch (IOException e) {
        System.out.println(e.getMessage());
      }
    }

    System.out.println();
  }
コード例 #2
0
ファイル: MetricsCalculator.java プロジェクト: mindis/TagRec
  public MetricsCalculator(
      PredictionFileReader reader,
      String outputFile,
      int k,
      BookmarkReader bookmarkReader,
      boolean recommTags) {
    this.reader = reader;
    if (recommTags) { // TODO: check
      this.bookmarkReader = bookmarkReader;
    }
    BufferedWriter bw = null;
    // TODO: Enable if you need data for statistical tests
    if ((recommTags && (k == 5 || k == 10)) || (!recommTags && k == 20)) {
      try {
        FileWriter writer = new FileWriter(new File(outputFile + "_" + k + ".txt"), true);
        bw = new BufferedWriter(writer);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }

    // double count = this.reader.getPredictionCount(); // only user where there are recommendations
    double count = this.reader.getPredictionData().size(); // all users
    double recall = 0.0,
        precision = 0.0,
        mrr = 0.0,
        fMeasure = 0.0,
        map = 0.0,
        nDCG = 0.0,
        diversity = 0.0,
        serendipity = 0.0;

    List<Map<Integer, Double>> entityFeatures = null;
    List<Map<Integer, Integer>> tagCountMaps = null;
    List<Bookmark> trainList = null;
    if (this.bookmarkReader != null) {
      trainList =
          this.bookmarkReader.getBookmarks().subList(0, this.bookmarkReader.getCountLimit());
      if (recommTags) {
        tagCountMaps = Utilities.getResMaps(trainList);
        entityFeatures = Utilities.getResourceMapsForTags(trainList);
      } else {
        entityFeatures =
            Utilities.getUniqueTopicMaps(trainList, true); // TODO: check regarding unique!
      }
    }
    // process each predicted line
    for (PredictionData data : this.reader.getPredictionData()) {
      if (data == null) {
        if (bw != null) {
          try {
            bw.write("0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0\n");
          } catch (Exception e) {
            e.printStackTrace();
          }
        }
        continue;
      }
      double cRecall = data.getRecall();
      recall += cRecall;
      double cPrecision = data.getPrecision(recommTags);
      precision += cPrecision;
      double cFMeasure = data.getFMeasure(recommTags);
      fMeasure += cFMeasure;
      double cMRR = data.getMRR();
      mrr += cMRR;
      double cMAP = data.getMAP();
      map += cMAP;
      double cNDCG = data.getNDCG();
      nDCG += cNDCG;
      double cDiversity = 0.0, cSerendipity = 0.0;
      if (this.bookmarkReader != null) {
        if (recommTags) {
          cDiversity = data.getTagDiversity(entityFeatures);
          if (data.getResID() < tagCountMaps.size()) {
            Map<Integer, Integer> tagCountMap = tagCountMaps.get(data.getResID());
            cSerendipity = data.getTagSerendipity(tagCountMap, false);
          } else {
            cSerendipity = 1.0;
          }
        } else {
          List<Integer> knownEntities = Bookmark.getResourcesFromUser(trainList, data.getUserID());
          cDiversity = data.getDiversity(entityFeatures, true);
          cSerendipity = data.getSerendipity(entityFeatures, knownEntities);
        }
        diversity += cDiversity;
        serendipity += cSerendipity;
      }

      if (bw != null) {
        try {
          bw.write(Double.toString(cRecall).replace(',', '.') + ";");
          bw.write(Double.toString(cPrecision).replace(',', '.') + ";");
          bw.write(Double.toString(cFMeasure).replace(',', '.') + ";");
          bw.write(Double.toString(cMRR).replace(',', '.') + ";");
          bw.write(Double.toString(cMAP).replace(',', '.') + ";");
          bw.write(Double.toString(cNDCG).replace(',', '.') + ";");
          bw.write(Double.toString(data.getCoverage()).replace(',', '.') + ";");
          bw.write(Double.toString(cDiversity).replace('.', ',') + ";");
          bw.write(Double.toString(cSerendipity).replace('.', ','));
          bw.write("\n");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }

    this.recall = recall / count;
    this.precision = precision / count;
    this.fMeasure = fMeasure / count;
    this.mrr = mrr / count;
    this.map = map / count;
    this.nDCG = nDCG / count;
    this.userCoverage =
        (double) this.reader.getPredictionCount() / (double) this.reader.getPredictionData().size();
    this.diversity = diversity / count;
    this.serendipity = serendipity / count;

    // TODO: enable in case statistics are needed
    if (bw != null) {
      try {
        // bw.write("\n");
        bw.flush();
        bw.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }