public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) { List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class); List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>(); CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap(); for (int i = 0; i < sentences.size(); i++) { allGoldMentions.add(new ArrayList<Mention>()); } int maxCorefClusterId = -1; for (String corefIdStr : corefChainMap.keySet()) { int id = Integer.parseInt(corefIdStr); if (id > maxCorefClusterId) { maxCorefClusterId = id; } } int newMentionID = maxCorefClusterId + 1; for (String corefIdStr : corefChainMap.keySet()) { int id = Integer.parseInt(corefIdStr); int clusterMentionCnt = 0; for (CoreMap m : corefChainMap.get(corefIdStr)) { clusterMentionCnt++; Mention mention = new Mention(); mention.goldCorefClusterID = id; if (clusterMentionCnt == 1) { // First mention in cluster mention.mentionID = id; mention.originalRef = -1; } else { mention.mentionID = newMentionID; mention.originalRef = id; newMentionID++; } if (maxID < mention.mentionID) maxID = mention.mentionID; int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class); CoreMap sent = sentences.get(sentIndex); mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class); mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class); // will be set by arrange mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class); // Mention dependency is collapsed dependency for sentence mention.dependency = sentences .get(sentIndex) .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); allGoldMentions.get(sentIndex).add(mention); } } return allGoldMentions; }
public static List<List<Mention>> makeCopy(List<List<Mention>> mentions) { List<List<Mention>> copy = new ArrayList<List<Mention>>(mentions.size()); for (List<Mention> sm : mentions) { List<Mention> sm2 = new ArrayList<Mention>(sm.size()); for (Mention m : sm) { Mention m2 = new Mention(); m2.goldCorefClusterID = m.goldCorefClusterID; m2.mentionID = m.mentionID; m2.startIndex = m.startIndex; m2.endIndex = m.endIndex; m2.originalSpan = m.originalSpan; m2.dependency = m.dependency; sm2.add(m2); } copy.add(sm2); } return copy; }