Ejemplo n.º 1
0
  public HMM(int n, List<Pair> pairs) {
    long time = System.currentTimeMillis();
    this.n = n;
    this.pairs = pairs;

    System.out.println("Counting states");
    // Find States and Labels
    for (Pair pair : pairs) {
      words.add(pair.word);
      tags.add(pair.tag);
    }

    System.out.println("Finding transition probabilities");
    System.out.println(pairs.size());
    // Find tag transition probabilities
    System.out.println("Counting ngrams");
    countGrams(n, nGramCount, false);
    if (n > 1) {
      System.out.println("Counting n-1 grams");
      countGrams(n - 1, nMinusOneGramCount, false);
    }

    // Find emission probabilities
    System.out.println("Finding emission probabilities");
    for (Pair pair : pairs) {
      Util.tagWordCount(tagsMap, pair.word, pair.tag);
      Util.wordTagCount(wordsMap, pair.word, pair.tag);
      Util.incrementMap(wordCount, pair.word);
      Util.incrementMap(tagCount, pair.tag);
    }
    Util.mostCommonTagMap(wordsMap, mostCommonTagMap);
    mostCommonTag = Util.mostCommonTag(tagCount);
    System.out.println((System.currentTimeMillis() - time) / 1000.0);
  }
Ejemplo n.º 2
0
 public void countGrams(int n, HashMap<String, Integer> count, boolean word) {
   List<String> nGram = new ArrayList<String>();
   int i;
   for (i = 0; i < n; i++) nGram.add(pairs.get(i).getContent(word));
   for (; i < pairs.size(); i++) {
     Util.incrementMap(count, nGram.toString());
     nGram.add(pairs.get(i).getContent(word));
     nGram.remove(0);
   }
   Util.incrementMap(count, nGram.toString());
 }