Пример #1
0
 private static void labelTestSet(
     POSTagger posTagger, List<TaggedSentence> testSentences, String path) throws Exception {
   BufferedWriter writer = new BufferedWriter(new FileWriter(path));
   for (TaggedSentence sentence : testSentences) {
     List<String> words = sentence.getWords();
     List<String> guessedTags = posTagger.tag(words);
     for (int i = 0; i < words.size(); i++) {
       writer.write(words.get(i) + "\t" + guessedTags.get(i) + "\n");
     }
     writer.write("\n");
   }
   writer.close();
 }
Пример #2
0
 private static void evaluateTagger(
     POSTagger posTagger,
     List<TaggedSentence> taggedSentences,
     Set<String> trainingVocabulary,
     boolean verbose) {
   double numTags = 0.0;
   double numTagsCorrect = 0.0;
   double numUnknownWords = 0.0;
   double numUnknownWordsCorrect = 0.0;
   int numDecodingInversions = 0;
   for (TaggedSentence taggedSentence : taggedSentences) {
     List<String> words = taggedSentence.getWords();
     List<String> goldTags = taggedSentence.getTags();
     List<String> guessedTags = posTagger.tag(words);
     for (int position = 0; position < words.size() - 1; position++) {
       String word = words.get(position);
       String goldTag = goldTags.get(position);
       String guessedTag = guessedTags.get(position);
       if (guessedTag.equals(goldTag)) numTagsCorrect += 1.0;
       numTags += 1.0;
       if (!trainingVocabulary.contains(word)) {
         if (guessedTag.equals(goldTag)) numUnknownWordsCorrect += 1.0;
         numUnknownWords += 1.0;
       }
     }
     double scoreOfGoldTagging = posTagger.scoreTagging(taggedSentence);
     double scoreOfGuessedTagging = posTagger.scoreTagging(new TaggedSentence(words, guessedTags));
     if (scoreOfGoldTagging > scoreOfGuessedTagging) {
       numDecodingInversions++;
       if (verbose)
         System.out.println(
             "WARNING: Decoder suboptimality detected.  Gold tagging has higher score than guessed tagging.");
     }
     if (verbose) System.out.println(alignedTaggings(words, goldTags, guessedTags, true) + "\n");
   }
   System.out.println(
       "Tag Accuracy: "
           + (numTagsCorrect / numTags)
           + " (Unknown Accuracy: "
           + (numUnknownWordsCorrect / numUnknownWords)
           + ")  Decoder Suboptimalities Detected: "
           + numDecodingInversions);
 }