public void add(JCas jcas, Set<String> inputPaths, Type sentenceType) throws IOException { ConditionalFrequencyDistribution<Integer, String> cfd = new ConditionalFrequencyDistribution<Integer, String>(); CAS cas = jcas.getCas(); for (AnnotationFS annotation : CasUtil.select(cas, sentenceType)) { for (String path : inputPaths) { String[] segments = path.split("/", 2); String typeName = segments[0]; Type type = getInputType(cas, typeName); List<AnnotationFS> tokens = CasUtil.selectCovered(cas, type, annotation); List<String> tokenStrings; try { tokenStrings = createStringList(tokens, segments); } catch (AnalysisEngineProcessException e) { throw new IOException(e); } for (int ngramLength = minNgramLength; ngramLength <= maxNgramLength; ngramLength++) { cfd.incAll(ngramLength, new NGramStringIterable(tokenStrings, ngramLength, ngramLength)); } } } add(cfd); }
/** * Generate an instance from the text covered by the given annotation. * * @param annotation an annotation representing a document segment, e.g. {@link Sentence}. * @param tokenType the type to use for representing tokens, usually {@link Token}, but could also * be any other type. * @return */ private TokenSequence generateTokenSequence(AnnotationFS annotation, Type tokenType) { TokenSequence tokenSequence = new TokenSequence(); for (AnnotationFS token : CasUtil.selectCovered(tokenType, annotation)) { for (String tokenText : getTokensFromAnnotation(token, useLemma, minTokenLength)) { tokenSequence.add(tokenText); } } return tokenSequence; }
/** * Returns the (one) annotation of a given type that is aligned with another annotation. * * @param annotation An annotation. * @return The annotation aligned with another annotation. */ private AnnotationFS getAnnotation(Type type, AnnotationFS annotation) { List<AnnotationFS> annotations = CasUtil.selectCovered(annotation.getCAS(), type, annotation); if (annotations.size() != 1) { getLogger() .warn( "Could not find matching annotation of type " + type + " for annotation: " + annotation.getCoveredText()); return null; } return annotations.get(0); }