Пример #1
0
  /**
   * Infer new bases using the given lexicon and transforms
   *
   * @param lex the lexicon
   * @param transform the learned transforms
   * @return a Set of inferred words
   */
  private Collection<Word> inferBases(Lexicon lex, Transform transform) {
    // Go over each unmodeled word with affix2 of the transform. If
    // its hypothesized base is not word, infer it
    Set<Word> newWords = new THashSet<Word>();
    for (Word w : transform.getAffix2().getWordSet()) {
      // Skip anything not unmodeled
      if (w.getSet() != WordSet.UNMODELED) {
        continue;
      }

      // Hypothesize the base
      String baseText = Transform.inferBase(w, transform);

      // If the base does not exist, try to infer it
      if (lex.getWord(baseText) == null) {
        // If it was already inferred, add it to the lexicon
        if (inferredBases.contains(baseText)) {
          // Create a new word using the token count of the word
          // that ended up promoting it
          Word newWord = new Word(baseText, w.getCount(), false);
          newWords.add(newWord);
        } else {
          // Otherwise, infer it
          inferredBases.add(baseText);
        }
      }
    }

    return newWords;
  }