public static Dictionary buildNGramDictionary(ObjectStream<POSSample> samples, int cutoff)
      throws IOException {

    NGramModel ngramModel = new NGramModel();

    POSSample sample;
    while ((sample = samples.read()) != null) {
      String[] words = sample.getSentence();

      if (words.length > 0) ngramModel.add(new StringList(words), 1, 1);
    }

    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);

    return ngramModel.toDictionary(true);
  }
  public static void populatePOSDictionary(
      ObjectStream<POSSample> samples, MutableTagDictionary dict, int cutoff) throws IOException {
    System.out.println("Expanding POS Dictionary ...");
    long start = System.nanoTime();

    // the data structure will store the word, the tag, and the number of
    // occurrences
    Map<String, Map<String, AtomicInteger>> newEntries =
        new HashMap<String, Map<String, AtomicInteger>>();
    POSSample sample;
    while ((sample = samples.read()) != null) {
      String[] words = sample.getSentence();
      String[] tags = sample.getTags();

      for (int i = 0; i < words.length; i++) {
        // only store words
        if (!StringPattern.recognize(words[i]).containsDigit()) {
          String word;
          if (dict.isCaseSensitive()) {
            word = words[i];
          } else {
            word = StringUtil.toLowerCase(words[i]);
          }

          if (!newEntries.containsKey(word)) {
            newEntries.put(word, new HashMap<String, AtomicInteger>());
          }

          String[] dictTags = dict.getTags(word);
          if (dictTags != null) {
            for (String tag : dictTags) {
              // for this tags we start with the cutoff
              Map<String, AtomicInteger> value = newEntries.get(word);
              if (!value.containsKey(tag)) {
                value.put(tag, new AtomicInteger(cutoff));
              }
            }
          }

          if (!newEntries.get(word).containsKey(tags[i])) {
            newEntries.get(word).put(tags[i], new AtomicInteger(1));
          } else {
            newEntries.get(word).get(tags[i]).incrementAndGet();
          }
        }
      }
    }

    // now we check if the word + tag pairs have enough occurrences, if yes we
    // add it to the dictionary
    for (Entry<String, Map<String, AtomicInteger>> wordEntry : newEntries.entrySet()) {
      List<String> tagsForWord = new ArrayList<String>();
      for (Entry<String, AtomicInteger> entry : wordEntry.getValue().entrySet()) {
        if (entry.getValue().get() >= cutoff) {
          tagsForWord.add(entry.getKey());
        }
      }
      if (tagsForWord.size() > 0) {
        dict.put(wordEntry.getKey(), tagsForWord.toArray(new String[tagsForWord.size()]));
      }
    }

    System.out.println(
        "... finished expanding POS Dictionary. [" + (System.nanoTime() - start) / 1000000 + "ms]");
  }