Пример #1
0
 /** splits document 'doc' into sentences, adding 'sentence' annotations */
 static void addSentences(Document doc) {
   SpecialZoner.findSpecialZones(doc);
   Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
   if (textSegments == null) {
     System.out.println("No <TEXT> in document");
     return;
   }
   for (Annotation ann : textSegments) {
     Span textSpan = ann.span();
     // check document case
     Ace.monocase = Ace.allLowerCase(doc);
     // split into sentences
     SentenceSplitter.split(doc, textSpan);
   }
   Vector<Annotation> sentences = doc.annotationsOfType("sentence");
   if (sentences != null) {
     int sentNo = 0;
     for (Annotation sentence : sentences) {
       sentNo++;
       sentence.put("ID", "SENT-" + sentNo);
     }
   }
   doc.removeAnnotationsOfType("dateline");
   doc.removeAnnotationsOfType("textBreak");
   doc.shrink("sentence");
 }
Пример #2
0
 public static void addAnnotations(Document doc, AceDocument aceDoc) {
   boolean monocase = Ace.allLowerCase(doc);
   if (year.equals("2004")) gazetteer.setMonocase(monocase);
   if (flags.contains("sentences")) addSentences(doc);
   if (flags.contains("timex")) addTimexTags(doc, aceDoc);
   if (flags.contains("mentions")) addMentionTags(doc, aceDoc);
   if (flags.contains("names")) addENAMEXtags(doc, aceDoc);
 }