/** * Converts NamedEntityTagAnnotation tags into {@link EntityMention}s. This finds the longest * sequence of NamedEntityTagAnnotation tags of the matching type. * * @param sentence A sentence annotated with NamedEntityTagAnnotation */ public void makeAnnotationFromAllNERTags(CoreMap sentence) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); List<EntityMention> mentions = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); assert words != null; if (mentions == null) { this.logger.info("mentions are null"); mentions = new ArrayList<>(); } for (int start = 0; start < words.size(); start++) { int end; // find the first token after start that isn't of nerType String lastneTag = null; String ne = null; for (end = start; end < words.size(); end++) { ne = words.get(end).get(NamedEntityTagAnnotation.class); if (ne.equals(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL) || (lastneTag != null && !ne.equals(lastneTag))) { break; } lastneTag = ne; } if (end > start) { // found a match! String entityType = this.getEntityTypeForTag(lastneTag); EntityMention m = EntityMentionFactory.constructEntityMention( EntityMention.makeUniqueId(), sentence, new Span(start, end), new Span(start, end), entityType, null, null); // TODO: changed entityType in the above sentence to nerTag - Sonal logger.info("Created " + entityType + " entity mention: " + m); start = end - 1; mentions.add(m); } } sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, mentions); }
public EntityMention makeEntityMention( CoreMap sentence, int start, int end, String label, String identifier) { Span span = new Span(start, end); String type = null, subtype = null; if (!label.startsWith("B-") && !label.startsWith("I-")) { type = label; subtype = null; // TODO: add support for subtypes! (needed at least in ACE) } else { type = label.substring(2); subtype = null; // TODO: add support for subtypes! (needed at least in ACE) } EntityMention entity = EntityMentionFactory.constructEntityMention( identifier, sentence, span, span, type, subtype, null); Counter<String> probs = new DefaultCounter<>(); probs.setCount(entity.getType(), 1.0); entity.setTypeProbabilities(probs); return entity; }
/** * Converts NamedEntityTagAnnotation tags into {@link EntityMention}s. This finds the longest * sequence of NamedEntityTagAnnotation tags of the matching type. * * @param sentence A sentence, ideally annotated with NamedEntityTagAnnotation * @param nerTag The name of the NER tag to copy, e.g. "DATE". * @param entityType The type of the {@link EntityMention} objects created */ public static void makeAnnotationFromGivenNERTag( CoreMap sentence, String nerTag, String entityType) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); List<EntityMention> mentions = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); assert words != null; assert mentions != null; for (int start = 0; start < words.size(); start++) { int end; // find the first token after start that isn't of nerType for (end = start; end < words.size(); end++) { String ne = words.get(end).get(NamedEntityTagAnnotation.class); if (!ne.equals(nerTag)) { break; } } if (end > start) { // found a match! EntityMention m = EntityMentionFactory.constructEntityMention( EntityMention.makeUniqueId(), sentence, new Span(start, end), new Span(start, end), entityType, null, null); logger.info("Created " + entityType + " entity mention: " + m); start = end - 1; mentions.add(m); } } sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, mentions); }