public void tokenize() { AnnotationSet tokenizationAs = gateDocument.getAnnotations("Tokenization"); AnnotationSet defaultAs = gateDocument.getAnnotations(""); for (Iterator<Annotation> it = tokenizationAs.iterator(); it.hasNext(); ) { Annotation currentTokenAnnotation = it.next(); FeatureMap tokenFeaturesMap = currentTokenAnnotation.getFeatures(); FeatureMap curFeaturesMap = Factory.newFeatureMap(); if ("Token".compareToIgnoreCase(currentTokenAnnotation.getType()) == 0) { curFeaturesMap.put("string", tokenFeaturesMap.get("string")); curFeaturesMap.put("root", tokenFeaturesMap.get("lemma")); curFeaturesMap.put("category", tokenFeaturesMap.get("POS")); // Add the new Token to the Annotation Set defaultAs.add( currentTokenAnnotation.getStartNode(), currentTokenAnnotation.getEndNode(), currentTokenAnnotation.getType(), curFeaturesMap); } } gateDocument.removeAnnotationSet("Tokenization"); }
public void splitter() { AnnotationSet sDetectionAS = gateDocument.getAnnotations("SentenceDetection"); AnnotationSet defaultAs = gateDocument.getAnnotations(""); for (Iterator<Annotation> it = sDetectionAS.iterator(); it.hasNext(); ) { Annotation currentSentenceAnnotation = it.next(); // Add the Sentence to the Annotation Set defaultAs.add( currentSentenceAnnotation.getStartNode(), currentSentenceAnnotation.getEndNode(), "Sentence", null); } gateDocument.removeAnnotationSet("SentenceDetection"); }