Пример #1
0
  public HMM(int n, List<Pair> pairs) {
    long time = System.currentTimeMillis();
    this.n = n;
    this.pairs = pairs;

    System.out.println("Counting states");
    // Find States and Labels
    for (Pair pair : pairs) {
      words.add(pair.word);
      tags.add(pair.tag);
    }

    System.out.println("Finding transition probabilities");
    System.out.println(pairs.size());
    // Find tag transition probabilities
    System.out.println("Counting ngrams");
    countGrams(n, nGramCount, false);
    if (n > 1) {
      System.out.println("Counting n-1 grams");
      countGrams(n - 1, nMinusOneGramCount, false);
    }

    // Find emission probabilities
    System.out.println("Finding emission probabilities");
    for (Pair pair : pairs) {
      Util.tagWordCount(tagsMap, pair.word, pair.tag);
      Util.wordTagCount(wordsMap, pair.word, pair.tag);
      Util.incrementMap(wordCount, pair.word);
      Util.incrementMap(tagCount, pair.tag);
    }
    Util.mostCommonTagMap(wordsMap, mostCommonTagMap);
    mostCommonTag = Util.mostCommonTag(tagCount);
    System.out.println((System.currentTimeMillis() - time) / 1000.0);
  }