/** * Lemmatise a phrase or word. If a phrase, only lemmatise the most RHS word. * * @param value * @return */ public String normalise(String value) { if (value.indexOf(" ") == -1 || value.endsWith(" s") || value.endsWith( "'s")) // if string is a single word, or it is in "XYZ's" form where the ' char has been // removed return lemmatizer.lemmatize(value, 1).trim(); String part1 = value.substring(0, value.lastIndexOf(" ")); String part2 = lemmatizer.lemmatize(value.substring(value.lastIndexOf(" ") + 1), 1); return part1 + " " + part2.trim(); }
/** * @param value original word * @param pos the part of speech of the last word * @return the lemma of original word */ public String getLemma(String value, String pos) { int POS = tagLookUp.get(pos); if (POS == 0) return lemmatizer.lemmatize(value); else return lemmatizer.lemmatize(value, POS); }