public static void main(String[] args) { Dataset dataset = new Dataset(new RealDataset(JoeConfig.TRAIN_DIR)); Set<String> restIdSet = new HashSet<String>(); Set<String> userIdSet = new HashSet<String>(); Map<Rating, Integer> globalRatingTally = new HashMap<Rating, Integer>(); // key = rest ID, value = num visitors Map<String, Integer> restTally = new HashMap<String, Integer>(); // key = rest ID, value = num visitors Map<String, Integer> userTally = new HashMap<String, Integer>(); while (dataset.hasNext()) { Sample s = dataset.next(); String userId = s.getFeatureValues().getUserId(); String restId = s.getFeatureValues().getRestaurantId(); double rating = s.getLabel().getRating(); Rating keyRating = Rating.valueOf(rating); incrementTally(globalRatingTally, keyRating); incrementTally(restTally, restId); incrementTally(userTally, userId); restIdSet.add(restId); userIdSet.add(userId); } System.out.println("Total number of users = " + userIdSet.size()); System.out.println("Total number of restaurants = " + restIdSet.size()); System.out.println("Global rating distribution = "); for (Rating rating : globalRatingTally.keySet()) { System.out.println(rating + ", " + globalRatingTally.get(rating)); } // key = # visitors, val = # rest with that #visitors Map<Integer, Integer> visitorTally = new HashMap<Integer, Integer>(); for (String restId : restTally.keySet()) { incrementTally(visitorTally, restTally.get(restId)); } System.out.println("#visitors, #restaurants with that exact number of visitors"); for (Integer viscount : visitorTally.keySet()) { System.out.println(viscount + ", " + visitorTally.get(viscount)); } // key = # ratings, val = # users with that number of ratings Map<Integer, Integer> ratingUserTally = new HashMap<Integer, Integer>(); for (String userId : userTally.keySet()) { incrementTally(ratingUserTally, userTally.get(userId)); } System.out.println("#ratings, #users with that exact number of ratings"); for (Integer ratingCount : ratingUserTally.keySet()) { System.out.println(ratingCount + ", " + ratingUserTally.get(ratingCount)); } }