private static void labelTestSet( POSTagger posTagger, List<TaggedSentence> testSentences, String path) throws Exception { BufferedWriter writer = new BufferedWriter(new FileWriter(path)); for (TaggedSentence sentence : testSentences) { List<String> words = sentence.getWords(); List<String> guessedTags = posTagger.tag(words); for (int i = 0; i < words.size(); i++) { writer.write(words.get(i) + "\t" + guessedTags.get(i) + "\n"); } writer.write("\n"); } writer.close(); }
private static void evaluateTagger( POSTagger posTagger, List<TaggedSentence> taggedSentences, Set<String> trainingVocabulary, boolean verbose) { double numTags = 0.0; double numTagsCorrect = 0.0; double numUnknownWords = 0.0; double numUnknownWordsCorrect = 0.0; int numDecodingInversions = 0; for (TaggedSentence taggedSentence : taggedSentences) { List<String> words = taggedSentence.getWords(); List<String> goldTags = taggedSentence.getTags(); List<String> guessedTags = posTagger.tag(words); for (int position = 0; position < words.size() - 1; position++) { String word = words.get(position); String goldTag = goldTags.get(position); String guessedTag = guessedTags.get(position); if (guessedTag.equals(goldTag)) numTagsCorrect += 1.0; numTags += 1.0; if (!trainingVocabulary.contains(word)) { if (guessedTag.equals(goldTag)) numUnknownWordsCorrect += 1.0; numUnknownWords += 1.0; } } double scoreOfGoldTagging = posTagger.scoreTagging(taggedSentence); double scoreOfGuessedTagging = posTagger.scoreTagging(new TaggedSentence(words, guessedTags)); if (scoreOfGoldTagging > scoreOfGuessedTagging) { numDecodingInversions++; if (verbose) System.out.println( "WARNING: Decoder suboptimality detected. Gold tagging has higher score than guessed tagging."); } if (verbose) System.out.println(alignedTaggings(words, goldTags, guessedTags, true) + "\n"); } System.out.println( "Tag Accuracy: " + (numTagsCorrect / numTags) + " (Unknown Accuracy: " + (numUnknownWordsCorrect / numUnknownWords) + ") Decoder Suboptimalities Detected: " + numDecodingInversions); }