/** * \brief Saves the buffer content into a zip file * * <p>To implement the dataset saving operation calls the Dataset.saveZip methods. * * @param zipfilename name of file in which will created a zip representation of the set of * datasets existing in the buffer; * @throws FileNotFoundException * @throws IOException */ public synchronized void saveZip(String zipfilename) throws FileNotFoundException, IOException { FileOutputStream dest = new FileOutputStream(zipfilename); CheckedOutputStream checksum = new CheckedOutputStream(dest, new Adler32()); ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(checksum)); Dataset d = null; while (!this.data.isEmpty()) { d = this.data.poll(); d.saveZip(out); } out.close(); this.numAtts = 0; }
public static void main(String[] args) { Dataset dataset = new Dataset(new RealDataset(JoeConfig.TRAIN_DIR)); Set<String> restIdSet = new HashSet<String>(); Set<String> userIdSet = new HashSet<String>(); Map<Rating, Integer> globalRatingTally = new HashMap<Rating, Integer>(); // key = rest ID, value = num visitors Map<String, Integer> restTally = new HashMap<String, Integer>(); // key = rest ID, value = num visitors Map<String, Integer> userTally = new HashMap<String, Integer>(); while (dataset.hasNext()) { Sample s = dataset.next(); String userId = s.getFeatureValues().getUserId(); String restId = s.getFeatureValues().getRestaurantId(); double rating = s.getLabel().getRating(); Rating keyRating = Rating.valueOf(rating); incrementTally(globalRatingTally, keyRating); incrementTally(restTally, restId); incrementTally(userTally, userId); restIdSet.add(restId); userIdSet.add(userId); } System.out.println("Total number of users = " + userIdSet.size()); System.out.println("Total number of restaurants = " + restIdSet.size()); System.out.println("Global rating distribution = "); for (Rating rating : globalRatingTally.keySet()) { System.out.println(rating + ", " + globalRatingTally.get(rating)); } // key = # visitors, val = # rest with that #visitors Map<Integer, Integer> visitorTally = new HashMap<Integer, Integer>(); for (String restId : restTally.keySet()) { incrementTally(visitorTally, restTally.get(restId)); } System.out.println("#visitors, #restaurants with that exact number of visitors"); for (Integer viscount : visitorTally.keySet()) { System.out.println(viscount + ", " + visitorTally.get(viscount)); } // key = # ratings, val = # users with that number of ratings Map<Integer, Integer> ratingUserTally = new HashMap<Integer, Integer>(); for (String userId : userTally.keySet()) { incrementTally(ratingUserTally, userTally.get(userId)); } System.out.println("#ratings, #users with that exact number of ratings"); for (Integer ratingCount : ratingUserTally.keySet()) { System.out.println(ratingCount + ", " + ratingUserTally.get(ratingCount)); } }
/** * \brief Adds a dataset to buffer * * @param d dataset to be inserted * @return \c true if the dataset was inserted into buffer successfully , or \c false otherwise. */ public synchronized boolean add(Dataset d) { if (this.isFull()) { return false; } this.numAtts += d.getNumAtts(); this.data.add(d); return true; }