Пример #1
0
 /**
  * Scores a tagging for a sentence. Note that a tag sequence not accepted by the markov process
  * should receive a log score of Double.NEGATIVE_INFINITY.
  */
 public double scoreTagging(TaggedSentence taggedSentence) {
   double logScore = 0.0;
   List<LabeledLocalTrigramContext> labeledLocalTrigramContexts =
       extractLabeledLocalTrigramContexts(taggedSentence);
   for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) {
     Counter<String> logScoreCounter =
         localTrigramScorer.getLogScoreCounter(labeledLocalTrigramContext);
     String currentTag = labeledLocalTrigramContext.getCurrentTag();
     if (logScoreCounter.containsKey(currentTag)) {
       logScore += logScoreCounter.getCount(currentTag);
     } else {
       logScore += Double.NEGATIVE_INFINITY;
     }
   }
   return logScore;
 }
Пример #2
0
 public void train(List<LabeledLocalTrigramContext> labeledLocalTrigramContexts) {
   // collect word-tag counts
   for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) {
     String word = labeledLocalTrigramContext.getCurrentWord();
     String tag = labeledLocalTrigramContext.getCurrentTag();
     if (!wordsToTags.keySet().contains(word)) {
       // word is currently unknown, so tally its tag in the unknown tag counter
       unknownWordTags.incrementCount(tag, 1.0);
     }
     wordsToTags.incrementCount(word, tag, 1.0);
     seenTagTrigrams.add(
         makeTrigramString(
             labeledLocalTrigramContext.getPreviousPreviousTag(),
             labeledLocalTrigramContext.getPreviousTag(),
             labeledLocalTrigramContext.getCurrentTag()));
   }
   wordsToTags = Counters.conditionalNormalize(wordsToTags);
   unknownWordTags = Counters.normalize(unknownWordTags);
 }