private static void addLemma( Morphology morpha, Class<? extends CoreAnnotation<String>> ann, CoreMap map, String word, String tag) { if (tag.length() > 0) { String phrasalVerb = phrasalVerb(morpha, word, tag); if (phrasalVerb == null) { map.set(ann, morpha.lemma(word, tag)); } else { map.set(ann, phrasalVerb); } } else { map.set(ann, morpha.stem(word)); } }
/** * If a token is a phrasal verb with an underscore between a verb and a particle, return the * phrasal verb lemmatized. If not, return null */ private static String phrasalVerb(Morphology morpha, String word, String tag) { // must be a verb and contain an underscore assert (word != null); assert (tag != null); if (!tag.startsWith("VB") || !word.contains("_")) return null; // check whether the last part is a particle String[] verb = word.split("_"); if (verb.length != 2) return null; String particle = verb[1]; if (particles.contains(particle)) { String base = verb[0]; String lemma = morpha.lemma(base, tag); return lemma + '_' + particle; } return null; }