/** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */ static void addMentionTags(Document doc, AceDocument aceDoc) { ArrayList<AceEntity> entities = aceDoc.entities; for (int i = 0; i < entities.size(); i++) { AceEntity entity = entities.get(i); ArrayList<AceEntityMention> mentions = entity.mentions; for (int j = 0; j < mentions.size(); j++) { AceEntityMention mention = (AceEntityMention) mentions.get(j); // we compute a jetSpan not including trailing whitespace Span aceSpan = mention.head; // skip mentions in ChEnglish APF not aligned to any English text if (aceSpan.start() < 0) continue; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet("entity", new Integer(i)); if (flags.contains("types")) { features.put("type", entity.type.substring(0, 3)); if (entity.subtype != null) features.put("subtype", entity.subtype); } if (flags.contains("extents")) { String cleanExtent = mention.text.replaceAll("\n", " "); features.put("extent", AceEntityMention.addXmlEscapes(cleanExtent)); } doc.annotate("mention", jetSpan, features); } } }
public static void addAnnotations(Document doc, AceDocument aceDoc) { boolean monocase = Ace.allLowerCase(doc); if (year.equals("2004")) gazetteer.setMonocase(monocase); if (flags.contains("sentences")) addSentences(doc); if (flags.contains("timex")) addTimexTags(doc, aceDoc); if (flags.contains("mentions")) addMentionTags(doc, aceDoc); if (flags.contains("names")) addENAMEXtags(doc, aceDoc); }
public static void setFlag(String flag) { if (flag.equals("sentences") || flag.equals("timex") || flag.equals("mentions") || flag.equals("extents") || flag.equals("types") || flag.equals("names")) { flags.add(flag); } else { System.err.println("APFtoXML: invalid flag"); System.err.println("possible flags: sentences timex mentions extents types names"); System.exit(1); } }
public static void clearFlags() { flags.clear(); }