private void clean(JCas cas, SingleWordTermAnnotation annotation) { Set<TermComponentAnnotation> delete = new HashSet<TermComponentAnnotation>(); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermComponentAnnotation.type); FSIterator<Annotation> iterator = index.subiterator(annotation); while (iterator.hasNext()) { TermComponentAnnotation component = (TermComponentAnnotation) iterator.next(); FSIterator<Annotation> subiterator = index.subiterator(component); while (subiterator.hasNext()) { delete.add((TermComponentAnnotation) subiterator.next()); } } for (TermComponentAnnotation del : delete) { del.removeFromIndexes(); } }
/** Outputs Trigrams for an input Annotation. */ public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) { AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type); Iterator<Annotation> tIterator = tokens.subiterator(annotation); Annotation token = null; Annotation prev = null; Annotation prev_prev = null; while (tIterator.hasNext()) { prev_prev = prev; prev = token; token = tIterator.next(); if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) { NGram ngram = new NGram(aJCas); FSArray ngArray = new FSArray(aJCas, 3); ngArray.set(0, prev_prev); ngArray.set(1, prev); ngArray.set(2, token); ngram.setBegin(prev_prev.getBegin()); ngram.setEnd(token.getEnd()); ngram.setElements(ngArray); ngram.setElementType("edu.cmu.deiis.types.Token"); ngram.setConfidence(1D); ngram.setCasProcessorId(PROCESSOR_ID); ngram.addToIndexes(); } } }