/** * Scores a tagging for a sentence. Note that a tag sequence not accepted by the markov process * should receive a log score of Double.NEGATIVE_INFINITY. */ public double scoreTagging(TaggedSentence taggedSentence) { double logScore = 0.0; List<LabeledLocalTrigramContext> labeledLocalTrigramContexts = extractLabeledLocalTrigramContexts(taggedSentence); for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) { Counter<String> logScoreCounter = localTrigramScorer.getLogScoreCounter(labeledLocalTrigramContext); String currentTag = labeledLocalTrigramContext.getCurrentTag(); if (logScoreCounter.containsKey(currentTag)) { logScore += logScoreCounter.getCount(currentTag); } else { logScore += Double.NEGATIVE_INFINITY; } } return logScore; }
public void train(List<LabeledLocalTrigramContext> labeledLocalTrigramContexts) { // collect word-tag counts for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) { String word = labeledLocalTrigramContext.getCurrentWord(); String tag = labeledLocalTrigramContext.getCurrentTag(); if (!wordsToTags.keySet().contains(word)) { // word is currently unknown, so tally its tag in the unknown tag counter unknownWordTags.incrementCount(tag, 1.0); } wordsToTags.incrementCount(word, tag, 1.0); seenTagTrigrams.add( makeTrigramString( labeledLocalTrigramContext.getPreviousPreviousTag(), labeledLocalTrigramContext.getPreviousTag(), labeledLocalTrigramContext.getCurrentTag())); } wordsToTags = Counters.conditionalNormalize(wordsToTags); unknownWordTags = Counters.normalize(unknownWordTags); }