private void addCoref(Communication comm) {
   AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(comm);
   AnalyticUUIDGenerator g = f.create();
   corefMentions = new EntityMentionSet();
   corefMentions.setUuid(g.next());
   corefMentions.setMetadata(Conll2011.META_COREF);
   corefMentions.setMentionList(new ArrayList<>());
   Map<String, List<EntityMention>> clusters = new HashMap<>();
   int addedMentions = 0;
   for (Conll2011Sentence s : sentences) {
     // Get the entity mentions in this sentence
     Map<String, List<EntityMention>> c = s.getCoref(g);
     // Add these mentions to the EntityMentionSet
     for (List<EntityMention> ems : c.values()) {
       for (EntityMention em : ems) {
         corefMentions.addToMentionList(em);
         addedMentions++;
       }
     }
     // Merge into the document-level view of the entities
     for (Map.Entry<String, List<EntityMention>> se : c.entrySet()) {
       String clustId = se.getKey();
       List<EntityMention> existingMentions = clusters.get(clustId);
       if (existingMentions == null) {
         existingMentions = new ArrayList<>();
         clusters.put(clustId, existingMentions);
       }
       existingMentions.addAll(se.getValue());
     }
   }
   int addedEntities = 0;
   corefClusters = new EntitySet();
   corefClusters.setUuid(g.next());
   corefClusters.setMetadata(Conll2011.META_COREF);
   corefClusters.setMentionSetId(corefMentions.getUuid());
   corefClusters.setEntityList(new ArrayList<>());
   for (Map.Entry<String, List<EntityMention>> cluster : clusters.entrySet()) {
     addedEntities++;
     Entity ent = new Entity();
     ent.setUuid(g.next());
     ent.setConfidence(1);
     for (EntityMention em : cluster.getValue()) ent.addToMentionIdList(em.getUuid());
     corefClusters.addToEntityList(ent);
   }
   comm.addToEntitySetList(corefClusters);
   comm.addToEntityMentionSetList(corefMentions);
   if (conll2011.warnOnEmptyCoref && (addedMentions == 0 || addedEntities == 0)) {
     LOGGER.warn(
         "addedMentions="
             + addedMentions
             + " addedEntities="
             + addedEntities
             + " communication="
             + comm.getId());
   }
 }
Beispiel #2
0
 @Test(expected = RebarException.class)
 public void exWithAnnotations()
     throws RebarException, TableNotFoundException, MutationsRejectedException,
         InvalidInputException {
   assertFalse(cr.exists("bar"));
   Communication c = generateMockDocument();
   c.addToSectionSegmentations(new SingleSectionSegmenter().annotateDiff(c));
   ci.ingest(c);
   assertTrue(cr.exists(c));
 }
  public Communication convertToConcrete() {
    if (comm != null) return comm;
    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator g = f.create();
    comm = new Communication();
    comm.setId(id);
    comm.setUuid(g.next());
    comm.setType(communicationType);
    comm.setMetadata(Conll2011.META_GENERAL);

    // Tokenization for the words
    // TokenTagging for the POS tags
    // Parse for the constituency parse
    // TokenTagging for NER labels
    String sectionNum = null;
    Section section = null;
    for (Conll2011Sentence sent : sentences) {
      if (sectionNum == null || !sent.getPart().equals(sectionNum)) {
        if (section != null) comm.addToSectionList(section);
        section = new Section();
        section.setUuid(g.next());
        section.setKind(Conll2011.SECTION_TYPE);
        sectionNum = sent.getPart();
      }
      section.addToSentenceList(sent.convertToConcrete(g));
    }
    assert section != null;
    comm.addToSectionList(section);

    //  SituationMentionSet for the SRL labels
    propBankSrlSituationMentions = new SituationMentionSet();
    propBankSrlSituationMentions.setUuid(g.next());
    propBankSrlSituationMentions.setMetadata(Conll2011.META_SRL);
    propBankSrlSituationMentions.setMentionList(new ArrayList<>());
    for (Conll2011Sentence s : sentences) {
      for (int pai = 0; pai < s.getNumPredicates(); pai++) {
        SituationMention sm = s.getPredArg(pai, g);
        assert sm.getTokens() != null || sm.getConstituent() != null;
        propBankSrlSituationMentions.addToMentionList(sm);
      }
    }
    comm.addToSituationMentionSetList(propBankSrlSituationMentions);

    // EntitySet and EntityMentionSet for the coref labels
    addCoref(comm);

    // EntityMentionSet for the NER labels
    if (this.conll2011.addNerAsEntityMentionSet) {
      nerEms = new EntityMentionSet();
      nerEms.setUuid(g.next());
      nerEms.setMetadata(Conll2011.META_NER);
      nerEms.setMentionList(new ArrayList<>());
      for (Conll2011Sentence s : sentences)
        for (EntityMention em : s.getNerEntityMentions()) nerEms.addToMentionList(em);
      comm.addToEntityMentionSetList(nerEms);
    }

    return comm;
  }
Beispiel #4
0
 /**
  * Test method for {@link
  * edu.jhu.hlt.rebar.accumulo.CleanIngester#isDocumentIngested(edu.jhu.hlt.concrete.Communication)}.
  *
  * @throws RebarException
  * @throws TableNotFoundException
  * @throws MutationsRejectedException
  */
 @Test
 public void testIsDocumentIngested()
     throws RebarException, TableNotFoundException, MutationsRejectedException {
   assertFalse(cr.exists("bar"));
   Communication c = generateMockDocument();
   Communication c2 = generateMockDocument();
   c.startTime = 39595830;
   c2.startTime = 395958301;
   assertFalse(cr.exists(c));
   ci.ingest(c);
   assertTrue(cr.exists(c));
   assertFalse(cr.exists("bar"));
   ci.ingest(c);
   ci.ingest(c2);
   assertTrue(cr.exists(c));
   assertTrue(cr.exists(c2));
 }
  /**
   * The result starts out as a deep copy of left (so it inherits things like left's id and metadata
   * on things which can't be unioned like sections and sentences), and then annotations from right
   * are added.
   */
  public MergeTokenAlignedCommunications(Communication left, Communication right) {
    this.union = new Communication(left);
    int nsection = left.getSectionListSize();
    if (nsection != right.getSectionListSize())
      throw new IllegalArgumentException(
          "left has " + nsection + " sections but right has " + right.getSectionListSize());

    for (int i = 0; i < nsection; i++) {
      Section sl = left.getSectionList().get(i);
      Section sr = right.getSectionList().get(i);
      MergedSection m;
      try {
        m = new MergedSection(sl, sr);
      } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException("in section " + i, e);
      }
      union.getSectionList().set(i, m.getUnion());
    }
  }