/**
   * \brief Saves the buffer content into a zip file
   *
   * <p>To implement the dataset saving operation calls the Dataset.saveZip methods.
   *
   * @param zipfilename name of file in which will created a zip representation of the set of
   *     datasets existing in the buffer;
   * @throws FileNotFoundException
   * @throws IOException
   */
  public synchronized void saveZip(String zipfilename) throws FileNotFoundException, IOException {

    FileOutputStream dest = new FileOutputStream(zipfilename);
    CheckedOutputStream checksum = new CheckedOutputStream(dest, new Adler32());
    ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(checksum));
    Dataset d = null;
    while (!this.data.isEmpty()) {
      d = this.data.poll();
      d.saveZip(out);
    }
    out.close();
    this.numAtts = 0;
  }
Example #2
0
  public static void main(String[] args) {
    Dataset dataset = new Dataset(new RealDataset(JoeConfig.TRAIN_DIR));
    Set<String> restIdSet = new HashSet<String>();
    Set<String> userIdSet = new HashSet<String>();
    Map<Rating, Integer> globalRatingTally = new HashMap<Rating, Integer>();
    // key = rest ID, value = num visitors
    Map<String, Integer> restTally = new HashMap<String, Integer>();
    // key = rest ID, value = num visitors
    Map<String, Integer> userTally = new HashMap<String, Integer>();
    while (dataset.hasNext()) {
      Sample s = dataset.next();
      String userId = s.getFeatureValues().getUserId();
      String restId = s.getFeatureValues().getRestaurantId();
      double rating = s.getLabel().getRating();

      Rating keyRating = Rating.valueOf(rating);
      incrementTally(globalRatingTally, keyRating);
      incrementTally(restTally, restId);
      incrementTally(userTally, userId);
      restIdSet.add(restId);
      userIdSet.add(userId);
    }
    System.out.println("Total number of users = " + userIdSet.size());
    System.out.println("Total number of restaurants = " + restIdSet.size());

    System.out.println("Global rating distribution = ");
    for (Rating rating : globalRatingTally.keySet()) {
      System.out.println(rating + ", " + globalRatingTally.get(rating));
    }

    // key = # visitors, val = # rest with that #visitors
    Map<Integer, Integer> visitorTally = new HashMap<Integer, Integer>();
    for (String restId : restTally.keySet()) {
      incrementTally(visitorTally, restTally.get(restId));
    }
    System.out.println("#visitors, #restaurants with that exact number of visitors");
    for (Integer viscount : visitorTally.keySet()) {
      System.out.println(viscount + ", " + visitorTally.get(viscount));
    }

    // key = # ratings, val = # users with that number of ratings
    Map<Integer, Integer> ratingUserTally = new HashMap<Integer, Integer>();
    for (String userId : userTally.keySet()) {
      incrementTally(ratingUserTally, userTally.get(userId));
    }
    System.out.println("#ratings, #users with that exact number of ratings");

    for (Integer ratingCount : ratingUserTally.keySet()) {
      System.out.println(ratingCount + ", " + ratingUserTally.get(ratingCount));
    }
  }
 /**
  * \brief Adds a dataset to buffer
  *
  * @param d dataset to be inserted
  * @return \c true if the dataset was inserted into buffer successfully , or \c false otherwise.
  */
 public synchronized boolean add(Dataset d) {
   if (this.isFull()) {
     return false;
   }
   this.numAtts += d.getNumAtts();
   this.data.add(d);
   return true;
 }