public void add(JCas jcas, Set<String> inputPaths, Type sentenceType) throws IOException {
    ConditionalFrequencyDistribution<Integer, String> cfd =
        new ConditionalFrequencyDistribution<Integer, String>();

    CAS cas = jcas.getCas();

    for (AnnotationFS annotation : CasUtil.select(cas, sentenceType)) {

      for (String path : inputPaths) {

        String[] segments = path.split("/", 2);
        String typeName = segments[0];

        Type type = getInputType(cas, typeName);

        List<AnnotationFS> tokens = CasUtil.selectCovered(cas, type, annotation);

        List<String> tokenStrings;
        try {
          tokenStrings = createStringList(tokens, segments);
        } catch (AnalysisEngineProcessException e) {
          throw new IOException(e);
        }

        for (int ngramLength = minNgramLength; ngramLength <= maxNgramLength; ngramLength++) {
          cfd.incAll(ngramLength, new NGramStringIterable(tokenStrings, ngramLength, ngramLength));
        }
      }
    }

    add(cfd);
  }
  /**
   * Generate an instance from the text covered by the given annotation.
   *
   * @param annotation an annotation representing a document segment, e.g. {@link Sentence}.
   * @param tokenType the type to use for representing tokens, usually {@link Token}, but could also
   *     be any other type.
   * @return
   */
  private TokenSequence generateTokenSequence(AnnotationFS annotation, Type tokenType) {
    TokenSequence tokenSequence = new TokenSequence();

    for (AnnotationFS token : CasUtil.selectCovered(tokenType, annotation)) {
      for (String tokenText : getTokensFromAnnotation(token, useLemma, minTokenLength)) {
        tokenSequence.add(tokenText);
      }
    }

    return tokenSequence;
  }
Beispiel #3
0
  /**
   * Returns the (one) annotation of a given type that is aligned with another annotation.
   *
   * @param annotation An annotation.
   * @return The annotation aligned with another annotation.
   */
  private AnnotationFS getAnnotation(Type type, AnnotationFS annotation) {
    List<AnnotationFS> annotations = CasUtil.selectCovered(annotation.getCAS(), type, annotation);
    if (annotations.size() != 1) {
      getLogger()
          .warn(
              "Could not find matching annotation of type "
                  + type
                  + " for annotation: "
                  + annotation.getCoveredText());
      return null;
    }

    return annotations.get(0);
  }