예제 #1
0
  public Communication convertToConcrete() {
    if (comm != null) return comm;
    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator g = f.create();
    comm = new Communication();
    comm.setId(id);
    comm.setUuid(g.next());
    comm.setType(communicationType);
    comm.setMetadata(Conll2011.META_GENERAL);

    // Tokenization for the words
    // TokenTagging for the POS tags
    // Parse for the constituency parse
    // TokenTagging for NER labels
    String sectionNum = null;
    Section section = null;
    for (Conll2011Sentence sent : sentences) {
      if (sectionNum == null || !sent.getPart().equals(sectionNum)) {
        if (section != null) comm.addToSectionList(section);
        section = new Section();
        section.setUuid(g.next());
        section.setKind(Conll2011.SECTION_TYPE);
        sectionNum = sent.getPart();
      }
      section.addToSentenceList(sent.convertToConcrete(g));
    }
    assert section != null;
    comm.addToSectionList(section);

    //  SituationMentionSet for the SRL labels
    propBankSrlSituationMentions = new SituationMentionSet();
    propBankSrlSituationMentions.setUuid(g.next());
    propBankSrlSituationMentions.setMetadata(Conll2011.META_SRL);
    propBankSrlSituationMentions.setMentionList(new ArrayList<>());
    for (Conll2011Sentence s : sentences) {
      for (int pai = 0; pai < s.getNumPredicates(); pai++) {
        SituationMention sm = s.getPredArg(pai, g);
        assert sm.getTokens() != null || sm.getConstituent() != null;
        propBankSrlSituationMentions.addToMentionList(sm);
      }
    }
    comm.addToSituationMentionSetList(propBankSrlSituationMentions);

    // EntitySet and EntityMentionSet for the coref labels
    addCoref(comm);

    // EntityMentionSet for the NER labels
    if (this.conll2011.addNerAsEntityMentionSet) {
      nerEms = new EntityMentionSet();
      nerEms.setUuid(g.next());
      nerEms.setMetadata(Conll2011.META_NER);
      nerEms.setMentionList(new ArrayList<>());
      for (Conll2011Sentence s : sentences)
        for (EntityMention em : s.getNerEntityMentions()) nerEms.addToMentionList(em);
      comm.addToEntityMentionSetList(nerEms);
    }

    return comm;
  }
예제 #2
0
 private void addCoref(Communication comm) {
   AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(comm);
   AnalyticUUIDGenerator g = f.create();
   corefMentions = new EntityMentionSet();
   corefMentions.setUuid(g.next());
   corefMentions.setMetadata(Conll2011.META_COREF);
   corefMentions.setMentionList(new ArrayList<>());
   Map<String, List<EntityMention>> clusters = new HashMap<>();
   int addedMentions = 0;
   for (Conll2011Sentence s : sentences) {
     // Get the entity mentions in this sentence
     Map<String, List<EntityMention>> c = s.getCoref(g);
     // Add these mentions to the EntityMentionSet
     for (List<EntityMention> ems : c.values()) {
       for (EntityMention em : ems) {
         corefMentions.addToMentionList(em);
         addedMentions++;
       }
     }
     // Merge into the document-level view of the entities
     for (Map.Entry<String, List<EntityMention>> se : c.entrySet()) {
       String clustId = se.getKey();
       List<EntityMention> existingMentions = clusters.get(clustId);
       if (existingMentions == null) {
         existingMentions = new ArrayList<>();
         clusters.put(clustId, existingMentions);
       }
       existingMentions.addAll(se.getValue());
     }
   }
   int addedEntities = 0;
   corefClusters = new EntitySet();
   corefClusters.setUuid(g.next());
   corefClusters.setMetadata(Conll2011.META_COREF);
   corefClusters.setMentionSetId(corefMentions.getUuid());
   corefClusters.setEntityList(new ArrayList<>());
   for (Map.Entry<String, List<EntityMention>> cluster : clusters.entrySet()) {
     addedEntities++;
     Entity ent = new Entity();
     ent.setUuid(g.next());
     ent.setConfidence(1);
     for (EntityMention em : cluster.getValue()) ent.addToMentionIdList(em.getUuid());
     corefClusters.addToEntityList(ent);
   }
   comm.addToEntitySetList(corefClusters);
   comm.addToEntityMentionSetList(corefMentions);
   if (conll2011.warnOnEmptyCoref && (addedMentions == 0 || addedEntities == 0)) {
     LOGGER.warn(
         "addedMentions="
             + addedMentions
             + " addedEntities="
             + addedEntities
             + " communication="
             + comm.getId());
   }
 }