Пример #1
0
 private static void addLemma(
     Morphology morpha,
     Class<? extends CoreAnnotation<String>> ann,
     CoreMap map,
     String word,
     String tag) {
   if (tag.length() > 0) {
     String phrasalVerb = phrasalVerb(morpha, word, tag);
     if (phrasalVerb == null) {
       map.set(ann, morpha.lemma(word, tag));
     } else {
       map.set(ann, phrasalVerb);
     }
   } else {
     map.set(ann, morpha.stem(word));
   }
 }
Пример #2
0
  /**
   * If a token is a phrasal verb with an underscore between a verb and a particle, return the
   * phrasal verb lemmatized. If not, return null
   */
  private static String phrasalVerb(Morphology morpha, String word, String tag) {

    // must be a verb and contain an underscore
    assert (word != null);
    assert (tag != null);
    if (!tag.startsWith("VB") || !word.contains("_")) return null;

    // check whether the last part is a particle
    String[] verb = word.split("_");
    if (verb.length != 2) return null;
    String particle = verb[1];
    if (particles.contains(particle)) {
      String base = verb[0];
      String lemma = morpha.lemma(base, tag);
      return lemma + '_' + particle;
    }

    return null;
  }