public Communication convertToConcrete() { if (comm != null) return comm; AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(); AnalyticUUIDGenerator g = f.create(); comm = new Communication(); comm.setId(id); comm.setUuid(g.next()); comm.setType(communicationType); comm.setMetadata(Conll2011.META_GENERAL); // Tokenization for the words // TokenTagging for the POS tags // Parse for the constituency parse // TokenTagging for NER labels String sectionNum = null; Section section = null; for (Conll2011Sentence sent : sentences) { if (sectionNum == null || !sent.getPart().equals(sectionNum)) { if (section != null) comm.addToSectionList(section); section = new Section(); section.setUuid(g.next()); section.setKind(Conll2011.SECTION_TYPE); sectionNum = sent.getPart(); } section.addToSentenceList(sent.convertToConcrete(g)); } assert section != null; comm.addToSectionList(section); // SituationMentionSet for the SRL labels propBankSrlSituationMentions = new SituationMentionSet(); propBankSrlSituationMentions.setUuid(g.next()); propBankSrlSituationMentions.setMetadata(Conll2011.META_SRL); propBankSrlSituationMentions.setMentionList(new ArrayList<>()); for (Conll2011Sentence s : sentences) { for (int pai = 0; pai < s.getNumPredicates(); pai++) { SituationMention sm = s.getPredArg(pai, g); assert sm.getTokens() != null || sm.getConstituent() != null; propBankSrlSituationMentions.addToMentionList(sm); } } comm.addToSituationMentionSetList(propBankSrlSituationMentions); // EntitySet and EntityMentionSet for the coref labels addCoref(comm); // EntityMentionSet for the NER labels if (this.conll2011.addNerAsEntityMentionSet) { nerEms = new EntityMentionSet(); nerEms.setUuid(g.next()); nerEms.setMetadata(Conll2011.META_NER); nerEms.setMentionList(new ArrayList<>()); for (Conll2011Sentence s : sentences) for (EntityMention em : s.getNerEntityMentions()) nerEms.addToMentionList(em); comm.addToEntityMentionSetList(nerEms); } return comm; }
private void addCoref(Communication comm) { AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(comm); AnalyticUUIDGenerator g = f.create(); corefMentions = new EntityMentionSet(); corefMentions.setUuid(g.next()); corefMentions.setMetadata(Conll2011.META_COREF); corefMentions.setMentionList(new ArrayList<>()); Map<String, List<EntityMention>> clusters = new HashMap<>(); int addedMentions = 0; for (Conll2011Sentence s : sentences) { // Get the entity mentions in this sentence Map<String, List<EntityMention>> c = s.getCoref(g); // Add these mentions to the EntityMentionSet for (List<EntityMention> ems : c.values()) { for (EntityMention em : ems) { corefMentions.addToMentionList(em); addedMentions++; } } // Merge into the document-level view of the entities for (Map.Entry<String, List<EntityMention>> se : c.entrySet()) { String clustId = se.getKey(); List<EntityMention> existingMentions = clusters.get(clustId); if (existingMentions == null) { existingMentions = new ArrayList<>(); clusters.put(clustId, existingMentions); } existingMentions.addAll(se.getValue()); } } int addedEntities = 0; corefClusters = new EntitySet(); corefClusters.setUuid(g.next()); corefClusters.setMetadata(Conll2011.META_COREF); corefClusters.setMentionSetId(corefMentions.getUuid()); corefClusters.setEntityList(new ArrayList<>()); for (Map.Entry<String, List<EntityMention>> cluster : clusters.entrySet()) { addedEntities++; Entity ent = new Entity(); ent.setUuid(g.next()); ent.setConfidence(1); for (EntityMention em : cluster.getValue()) ent.addToMentionIdList(em.getUuid()); corefClusters.addToEntityList(ent); } comm.addToEntitySetList(corefClusters); comm.addToEntityMentionSetList(corefMentions); if (conll2011.warnOnEmptyCoref && (addedMentions == 0 || addedEntities == 0)) { LOGGER.warn( "addedMentions=" + addedMentions + " addedEntities=" + addedEntities + " communication=" + comm.getId()); } }