/**
   * This function generaes smoothed unigramprobabilities for given data
   *
   * @param f
   * @return
   * @throws IOException
   */
  public List<Gram> generateSmoothedProbabilities(File f) throws IOException {

    Tokenizer tokenizer = new Tokenizer();

    // Frequency map - this map contains frequency as the key and frequency
    // of frequency as the value.

    SmoothingInputBean bean = new SmoothingInputBean();
    bean.setTokens(tokenizer.getTokens(f));
    bean.setWordFrequencyMap(tokenizer.extractCorpusFrequency(bean.getTokens()));
    bean.setDenominiator(bean.getTokens().getTokens().size());

    List<Gram> grams = generateSmoothedProbabilities(bean);
    return grams;
  }
  /*
   * (non-Javadoc)
   *
   * @see hw1.service.NGramGenerator#generateProbabilities(java.io.File)
   */
  public List<Gram> generateProbabilities(File f) throws IOException {
    List<Gram> grams = new LinkedList<Gram>();
    Tokenizer tokenizer = new Tokenizer();
    TokenList tokens = tokenizer.getTokens(f);

    Map<String, Integer> map = tokenizer.extractCorpusFrequency(tokens);
    int count = 0;
    for (Map.Entry<String, Integer> entry : map.entrySet()) {
      count += entry.getValue();
    }
    for (Map.Entry<String, Integer> entry : map.entrySet()) {
      grams.add(new Gram(entry.getKey(), (Utils.round(((double) entry.getValue() / count), 5))));
    }
    Collections.sort(grams);
    return grams;
  }