public void add(JCas jcas, Set<String> inputPaths, Type sentenceType) throws IOException { ConditionalFrequencyDistribution<Integer, String> cfd = new ConditionalFrequencyDistribution<Integer, String>(); CAS cas = jcas.getCas(); for (AnnotationFS annotation : CasUtil.select(cas, sentenceType)) { for (String path : inputPaths) { String[] segments = path.split("/", 2); String typeName = segments[0]; Type type = getInputType(cas, typeName); List<AnnotationFS> tokens = CasUtil.selectCovered(cas, type, annotation); List<String> tokenStrings; try { tokenStrings = createStringList(tokens, segments); } catch (AnalysisEngineProcessException e) { throw new IOException(e); } for (int ngramLength = minNgramLength; ngramLength <= maxNgramLength; ngramLength++) { cfd.incAll(ngramLength, new NGramStringIterable(tokenStrings, ngramLength, ngramLength)); } } } add(cfd); }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { try { converter.add(jcas, inputPaths, jcas.getCas().getTypeSystem().getType(contextType)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }