예제 #1
0
  public void add(JCas jcas, Set<String> inputPaths, Type sentenceType) throws IOException {
    ConditionalFrequencyDistribution<Integer, String> cfd =
        new ConditionalFrequencyDistribution<Integer, String>();

    CAS cas = jcas.getCas();

    for (AnnotationFS annotation : CasUtil.select(cas, sentenceType)) {

      for (String path : inputPaths) {

        String[] segments = path.split("/", 2);
        String typeName = segments[0];

        Type type = getInputType(cas, typeName);

        List<AnnotationFS> tokens = CasUtil.selectCovered(cas, type, annotation);

        List<String> tokenStrings;
        try {
          tokenStrings = createStringList(tokens, segments);
        } catch (AnalysisEngineProcessException e) {
          throw new IOException(e);
        }

        for (int ngramLength = minNgramLength; ngramLength <= maxNgramLength; ngramLength++) {
          cfd.incAll(ngramLength, new NGramStringIterable(tokenStrings, ngramLength, ngramLength));
        }
      }
    }

    add(cfd);
  }
예제 #2
0
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {

    try {
      converter.add(jcas, inputPaths, jcas.getCas().getTypeSystem().getType(contextType));
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }
  }