/** * Infer new bases using the given lexicon and transforms * * @param lex the lexicon * @param transform the learned transforms * @return a Set of inferred words */ private Collection<Word> inferBases(Lexicon lex, Transform transform) { // Go over each unmodeled word with affix2 of the transform. If // its hypothesized base is not word, infer it Set<Word> newWords = new THashSet<Word>(); for (Word w : transform.getAffix2().getWordSet()) { // Skip anything not unmodeled if (w.getSet() != WordSet.UNMODELED) { continue; } // Hypothesize the base String baseText = Transform.inferBase(w, transform); // If the base does not exist, try to infer it if (lex.getWord(baseText) == null) { // If it was already inferred, add it to the lexicon if (inferredBases.contains(baseText)) { // Create a new word using the token count of the word // that ended up promoting it Word newWord = new Word(baseText, w.getCount(), false); newWords.add(newWord); } else { // Otherwise, infer it inferredBases.add(baseText); } } } return newWords; }