public HMM(int n, List<Pair> pairs) { long time = System.currentTimeMillis(); this.n = n; this.pairs = pairs; System.out.println("Counting states"); // Find States and Labels for (Pair pair : pairs) { words.add(pair.word); tags.add(pair.tag); } System.out.println("Finding transition probabilities"); System.out.println(pairs.size()); // Find tag transition probabilities System.out.println("Counting ngrams"); countGrams(n, nGramCount, false); if (n > 1) { System.out.println("Counting n-1 grams"); countGrams(n - 1, nMinusOneGramCount, false); } // Find emission probabilities System.out.println("Finding emission probabilities"); for (Pair pair : pairs) { Util.tagWordCount(tagsMap, pair.word, pair.tag); Util.wordTagCount(wordsMap, pair.word, pair.tag); Util.incrementMap(wordCount, pair.word); Util.incrementMap(tagCount, pair.tag); } Util.mostCommonTagMap(wordsMap, mostCommonTagMap); mostCommonTag = Util.mostCommonTag(tagCount); System.out.println((System.currentTimeMillis() - time) / 1000.0); }
public void countGrams(int n, HashMap<String, Integer> count, boolean word) { List<String> nGram = new ArrayList<String>(); int i; for (i = 0; i < n; i++) nGram.add(pairs.get(i).getContent(word)); for (; i < pairs.size(); i++) { Util.incrementMap(count, nGram.toString()); nGram.add(pairs.get(i).getContent(word)); nGram.remove(0); } Util.incrementMap(count, nGram.toString()); }