示例#1
0
  public void add(JCas jcas, Set<String> inputPaths, Type sentenceType) throws IOException {
    ConditionalFrequencyDistribution<Integer, String> cfd =
        new ConditionalFrequencyDistribution<Integer, String>();

    CAS cas = jcas.getCas();

    for (AnnotationFS annotation : CasUtil.select(cas, sentenceType)) {

      for (String path : inputPaths) {

        String[] segments = path.split("/", 2);
        String typeName = segments[0];

        Type type = getInputType(cas, typeName);

        List<AnnotationFS> tokens = CasUtil.selectCovered(cas, type, annotation);

        List<String> tokenStrings;
        try {
          tokenStrings = createStringList(tokens, segments);
        } catch (AnalysisEngineProcessException e) {
          throw new IOException(e);
        }

        for (int ngramLength = minNgramLength; ngramLength <= maxNgramLength; ngramLength++) {
          cfd.incAll(ngramLength, new NGramStringIterable(tokenStrings, ngramLength, ngramLength));
        }
      }
    }

    add(cfd);
  }