Пример #1
0
 public boolean headIncludes(EntityMention otherEnt, boolean useSubType) {
   if (otherEnt.getSyntacticHeadTokenPosition() >= getHeadTokenStart()
       && otherEnt.getSyntacticHeadTokenPosition() < getHeadTokenEnd()
       && ((type != null && otherEnt.type != null && type.equals(otherEnt.type))
           || (type == null && otherEnt.type == null))
       && (!useSubType
           || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType))
               || (subType == null && otherEnt.subType == null)))) {
     return true;
   }
   return false;
 }
Пример #2
0
 public EntityMention makeEntityMention(
     CoreMap sentence, int start, int end, String label, String identifier) {
   Span span = new Span(start, end);
   String type = null, subtype = null;
   if (!label.startsWith("B-") && !label.startsWith("I-")) {
     type = label;
     subtype = null; // TODO: add support for subtypes! (needed at least in ACE)
   } else {
     type = label.substring(2);
     subtype = null; // TODO: add support for subtypes! (needed at least in ACE)
   }
   EntityMention entity =
       EntityMentionFactory.constructEntityMention(
           identifier, sentence, span, span, type, subtype, null);
   Counter<String> probs = new DefaultCounter<>();
   probs.setCount(entity.getType(), 1.0);
   entity.setTypeProbabilities(probs);
   return entity;
 }
Пример #3
0
 public int compare(EntityMention o1, EntityMention o2) {
   if (o1.getHeadTokenStart() < o2.getHeadTokenStart()) {
     return -1;
   } else if (o1.getHeadTokenStart() > o2.getHeadTokenStart()) {
     return 1;
   } else if (o1.getHeadTokenEnd() < o2.getHeadTokenEnd()) {
     return -1;
   } else if (o1.getHeadTokenEnd() > o2.getHeadTokenEnd()) {
     return 1;
   } else {
     return 0;
   }
 }
Пример #4
0
  /**
   * Converts NamedEntityTagAnnotation tags into {@link EntityMention}s. This finds the longest
   * sequence of NamedEntityTagAnnotation tags of the matching type.
   *
   * @param sentence A sentence annotated with NamedEntityTagAnnotation
   */
  public void makeAnnotationFromAllNERTags(CoreMap sentence) {
    List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
    List<EntityMention> mentions =
        sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
    assert words != null;
    if (mentions == null) {
      this.logger.info("mentions are null");
      mentions = new ArrayList<>();
    }

    for (int start = 0; start < words.size(); start++) {

      int end;
      // find the first token after start that isn't of nerType
      String lastneTag = null;
      String ne = null;
      for (end = start; end < words.size(); end++) {
        ne = words.get(end).get(NamedEntityTagAnnotation.class);
        if (ne.equals(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL)
            || (lastneTag != null && !ne.equals(lastneTag))) {
          break;
        }
        lastneTag = ne;
      }

      if (end > start) {

        // found a match!
        String entityType = this.getEntityTypeForTag(lastneTag);
        EntityMention m =
            EntityMentionFactory.constructEntityMention(
                EntityMention.makeUniqueId(),
                sentence,
                new Span(start, end),
                new Span(start, end),
                entityType,
                null,
                null);
        // TODO: changed entityType in the above sentence to nerTag - Sonal
        logger.info("Created " + entityType + " entity mention: " + m);
        start = end - 1;
        mentions.add(m);
      }
    }

    sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, mentions);
  }
Пример #5
0
  /**
   * Converts NamedEntityTagAnnotation tags into {@link EntityMention}s. This finds the longest
   * sequence of NamedEntityTagAnnotation tags of the matching type.
   *
   * @param sentence A sentence, ideally annotated with NamedEntityTagAnnotation
   * @param nerTag The name of the NER tag to copy, e.g. "DATE".
   * @param entityType The type of the {@link EntityMention} objects created
   */
  public static void makeAnnotationFromGivenNERTag(
      CoreMap sentence, String nerTag, String entityType) {
    List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
    List<EntityMention> mentions =
        sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
    assert words != null;
    assert mentions != null;

    for (int start = 0; start < words.size(); start++) {
      int end;
      // find the first token after start that isn't of nerType
      for (end = start; end < words.size(); end++) {
        String ne = words.get(end).get(NamedEntityTagAnnotation.class);
        if (!ne.equals(nerTag)) {
          break;
        }
      }

      if (end > start) {

        // found a match!
        EntityMention m =
            EntityMentionFactory.constructEntityMention(
                EntityMention.makeUniqueId(),
                sentence,
                new Span(start, end),
                new Span(start, end),
                entityType,
                null,
                null);
        logger.info("Created " + entityType + " entity mention: " + m);
        start = end - 1;
        mentions.add(m);
      }
    }

    sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, mentions);
  }