private Set<String> allowedFollowingTags( Set<String> tags, String previousPreviousTag, String previousTag) { Set<String> allowedTags = new HashSet<String>(); for (String tag : tags) { String trigramString = makeTrigramString(previousPreviousTag, previousTag, tag); if (seenTagTrigrams.contains((trigramString))) { allowedTags.add(tag); } } return allowedTags; }
private static void evaluateTagger( POSTagger posTagger, List<TaggedSentence> taggedSentences, Set<String> trainingVocabulary, boolean verbose) { double numTags = 0.0; double numTagsCorrect = 0.0; double numUnknownWords = 0.0; double numUnknownWordsCorrect = 0.0; int numDecodingInversions = 0; for (TaggedSentence taggedSentence : taggedSentences) { List<String> words = taggedSentence.getWords(); List<String> goldTags = taggedSentence.getTags(); List<String> guessedTags = posTagger.tag(words); for (int position = 0; position < words.size() - 1; position++) { String word = words.get(position); String goldTag = goldTags.get(position); String guessedTag = guessedTags.get(position); if (guessedTag.equals(goldTag)) numTagsCorrect += 1.0; numTags += 1.0; if (!trainingVocabulary.contains(word)) { if (guessedTag.equals(goldTag)) numUnknownWordsCorrect += 1.0; numUnknownWords += 1.0; } } double scoreOfGoldTagging = posTagger.scoreTagging(taggedSentence); double scoreOfGuessedTagging = posTagger.scoreTagging(new TaggedSentence(words, guessedTags)); if (scoreOfGoldTagging > scoreOfGuessedTagging) { numDecodingInversions++; if (verbose) System.out.println( "WARNING: Decoder suboptimality detected. Gold tagging has higher score than guessed tagging."); } if (verbose) System.out.println(alignedTaggings(words, goldTags, guessedTags, true) + "\n"); } System.out.println( "Tag Accuracy: " + (numTagsCorrect / numTags) + " (Unknown Accuracy: " + (numUnknownWordsCorrect / numUnknownWords) + ") Decoder Suboptimalities Detected: " + numDecodingInversions); }
public Counter<String> getLogScoreCounter(LocalTrigramContext localTrigramContext) { int position = localTrigramContext.getPosition(); String word = localTrigramContext.getWords().get(position); Counter<String> tagCounter = unknownWordTags; if (wordsToTags.keySet().contains(word)) { tagCounter = wordsToTags.getCounter(word); } Set<String> allowedFollowingTags = allowedFollowingTags( tagCounter.keySet(), localTrigramContext.getPreviousPreviousTag(), localTrigramContext.getPreviousTag()); Counter<String> logScoreCounter = new Counter<String>(); for (String tag : tagCounter.keySet()) { double logScore = Math.log(tagCounter.getCount(tag)); if (!restrictTrigrams || allowedFollowingTags.isEmpty() || allowedFollowingTags.contains(tag)) logScoreCounter.setCount(tag, logScore); } return logScoreCounter; }