/** splits document 'doc' into sentences, adding 'sentence' annotations */ static void addSentences(Document doc) { SpecialZoner.findSpecialZones(doc); Vector<Annotation> textSegments = doc.annotationsOfType("TEXT"); if (textSegments == null) { System.out.println("No <TEXT> in document"); return; } for (Annotation ann : textSegments) { Span textSpan = ann.span(); // check document case Ace.monocase = Ace.allLowerCase(doc); // split into sentences SentenceSplitter.split(doc, textSpan); } Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences != null) { int sentNo = 0; for (Annotation sentence : sentences) { sentNo++; sentence.put("ID", "SENT-" + sentNo); } } doc.removeAnnotationsOfType("dateline"); doc.removeAnnotationsOfType("textBreak"); doc.shrink("sentence"); }
public static void addAnnotations(Document doc, AceDocument aceDoc) { boolean monocase = Ace.allLowerCase(doc); if (year.equals("2004")) gazetteer.setMonocase(monocase); if (flags.contains("sentences")) addSentences(doc); if (flags.contains("timex")) addTimexTags(doc, aceDoc); if (flags.contains("mentions")) addMentionTags(doc, aceDoc); if (flags.contains("names")) addENAMEXtags(doc, aceDoc); }