예제 #1
0
 private Set<String> allowedFollowingTags(
     Set<String> tags, String previousPreviousTag, String previousTag) {
   Set<String> allowedTags = new HashSet<String>();
   for (String tag : tags) {
     String trigramString = makeTrigramString(previousPreviousTag, previousTag, tag);
     if (seenTagTrigrams.contains((trigramString))) {
       allowedTags.add(tag);
     }
   }
   return allowedTags;
 }
예제 #2
0
 private static void evaluateTagger(
     POSTagger posTagger,
     List<TaggedSentence> taggedSentences,
     Set<String> trainingVocabulary,
     boolean verbose) {
   double numTags = 0.0;
   double numTagsCorrect = 0.0;
   double numUnknownWords = 0.0;
   double numUnknownWordsCorrect = 0.0;
   int numDecodingInversions = 0;
   for (TaggedSentence taggedSentence : taggedSentences) {
     List<String> words = taggedSentence.getWords();
     List<String> goldTags = taggedSentence.getTags();
     List<String> guessedTags = posTagger.tag(words);
     for (int position = 0; position < words.size() - 1; position++) {
       String word = words.get(position);
       String goldTag = goldTags.get(position);
       String guessedTag = guessedTags.get(position);
       if (guessedTag.equals(goldTag)) numTagsCorrect += 1.0;
       numTags += 1.0;
       if (!trainingVocabulary.contains(word)) {
         if (guessedTag.equals(goldTag)) numUnknownWordsCorrect += 1.0;
         numUnknownWords += 1.0;
       }
     }
     double scoreOfGoldTagging = posTagger.scoreTagging(taggedSentence);
     double scoreOfGuessedTagging = posTagger.scoreTagging(new TaggedSentence(words, guessedTags));
     if (scoreOfGoldTagging > scoreOfGuessedTagging) {
       numDecodingInversions++;
       if (verbose)
         System.out.println(
             "WARNING: Decoder suboptimality detected.  Gold tagging has higher score than guessed tagging.");
     }
     if (verbose) System.out.println(alignedTaggings(words, goldTags, guessedTags, true) + "\n");
   }
   System.out.println(
       "Tag Accuracy: "
           + (numTagsCorrect / numTags)
           + " (Unknown Accuracy: "
           + (numUnknownWordsCorrect / numUnknownWords)
           + ")  Decoder Suboptimalities Detected: "
           + numDecodingInversions);
 }
예제 #3
0
 public Counter<String> getLogScoreCounter(LocalTrigramContext localTrigramContext) {
   int position = localTrigramContext.getPosition();
   String word = localTrigramContext.getWords().get(position);
   Counter<String> tagCounter = unknownWordTags;
   if (wordsToTags.keySet().contains(word)) {
     tagCounter = wordsToTags.getCounter(word);
   }
   Set<String> allowedFollowingTags =
       allowedFollowingTags(
           tagCounter.keySet(),
           localTrigramContext.getPreviousPreviousTag(),
           localTrigramContext.getPreviousTag());
   Counter<String> logScoreCounter = new Counter<String>();
   for (String tag : tagCounter.keySet()) {
     double logScore = Math.log(tagCounter.getCount(tag));
     if (!restrictTrigrams
         || allowedFollowingTags.isEmpty()
         || allowedFollowingTags.contains(tag)) logScoreCounter.setCount(tag, logScore);
   }
   return logScoreCounter;
 }