private void addMissingAnnotation(Annotation anno) { boolean useConstituency = CorefProperties.useConstituencyTree(props); final boolean LEMMATIZE = true; List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { boolean hasTree = sentence.containsKey(TreeCoreAnnotations.TreeAnnotation.class); Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); if (!useConstituency) { // TODO: temp for dev: make sure we don't use constituency tree sentence.remove(TreeCoreAnnotations.TreeAnnotation.class); } if (LEMMATIZE && hasTree && useConstituency) treeLemmatizer.transformTree(tree); // TODO don't need? } corenlp.annotate(anno); }
@Override public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<Tree> allTrees = new ArrayList<Tree>(); CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument(); if (conllDoc == null) { return null; } Annotation anno = conllDoc.getAnnotation(); List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) { // Remove tree from annotation and replace with parse using stanford parser sentence.remove(TreeCoreAnnotations.TreeAnnotation.class); } else { Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); // generate the dependency graph try { SemanticGraph deps = SemanticGraphFactory.makeFromTree( tree, SemanticGraphFactory.Mode.COLLAPSED, includeExtras, lemmatize, threadSafe); SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree( tree, SemanticGraphFactory.Mode.BASIC, includeExtras, lemmatize, threadSafe); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps); sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); } catch (Exception e) { logger.log( Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e); } } } String preSpeaker = null; int utterance = -1; for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) { if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) { token.set(CoreAnnotations.SpeakerAnnotation.class, ""); } String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class); if (!curSpeaker.equals(preSpeaker)) { utterance++; preSpeaker = curSpeaker; } token.set(CoreAnnotations.UtteranceAnnotation.class, utterance); } // Run pipeline stanfordProcessor.annotate(anno); for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) { allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class)); allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } // Initialize gold mentions List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc); List<List<Mention>> allPredictedMentions; if (Constants.USE_GOLD_MENTIONS) { // allPredictedMentions = allGoldMentions; // Make copy of gold mentions since mentions may be later merged, mentionID's changed and // stuff allPredictedMentions = makeCopy(allGoldMentions); } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) { allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder) .filterPredictedMentions(allGoldMentions, anno, dictionaries); } else { allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries); } try { recallErrors(allGoldMentions, allPredictedMentions, anno); } catch (IOException e) { throw new RuntimeException(e); } Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); doc.conllDoc = conllDoc; return doc; }