/** * For a string of 1 or more surface words, return all of the lexical entries for each word as a * list of sign hashes. Tokenization is performed using the configured tokenizer. * * @param w the words in string format * @return a list of sign hashes * @exception LexException thrown if word not found */ public List<SignHash> getEntriesFromWords(String s) throws LexException { List<SignHash> entries = new ArrayList<SignHash>(); List<Word> words = tokenizer.tokenize(s); for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { Word w = it.next(); SignHash signs = getSignsFromWord(w); if (signs.size() == 0) { throw new LexException("Word not in lexicon: \"" + w + "\""); } entries.add(signs); } return entries; }
/** * For a given word, return all of its surface word's lexical entries. If the word is not listed * in the lexicon, the tokenizer is consulted to see if it is a special token (date, time, etc.); * otherwise an exception is thrown. If the word has coarticulations, all applicable * coarticulation entries are applied to the base word, in an arbitrary order. * * @param w the word * @return a sign hash * @exception LexException thrown if word not found */ public SignHash getSignsFromWord(Word w) throws LexException { // reduce word to its core, removing coart attrs if any Word surfaceWord = Word.createSurfaceWord(w); Word coreWord = (surfaceWord.attrsIntersect(_coartAttrs)) ? Word.createCoreSurfaceWord(surfaceWord, _coartAttrs) : surfaceWord; // lookup core word SignHash result = getSignsFromWord(coreWord, null, null, null); if (result.size() == 0) { throw new LexException(coreWord + " not found in lexicon"); } // return signs if no coart attrs if (coreWord == surfaceWord) return result; // otherwise apply coarts for word applyCoarts(surfaceWord, result); return result; }