Beispiel #1
0
 /**
  * For a string of 1 or more surface words, return all of the lexical entries for each word as a
  * list of sign hashes. Tokenization is performed using the configured tokenizer.
  *
  * @param w the words in string format
  * @return a list of sign hashes
  * @exception LexException thrown if word not found
  */
 public List<SignHash> getEntriesFromWords(String s) throws LexException {
   List<SignHash> entries = new ArrayList<SignHash>();
   List<Word> words = tokenizer.tokenize(s);
   for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
     Word w = it.next();
     SignHash signs = getSignsFromWord(w);
     if (signs.size() == 0) {
       throw new LexException("Word not in lexicon: \"" + w + "\"");
     }
     entries.add(signs);
   }
   return entries;
 }
Beispiel #2
0
 /**
  * For a given word, return all of its surface word's lexical entries. If the word is not listed
  * in the lexicon, the tokenizer is consulted to see if it is a special token (date, time, etc.);
  * otherwise an exception is thrown. If the word has coarticulations, all applicable
  * coarticulation entries are applied to the base word, in an arbitrary order.
  *
  * @param w the word
  * @return a sign hash
  * @exception LexException thrown if word not found
  */
 public SignHash getSignsFromWord(Word w) throws LexException {
   // reduce word to its core, removing coart attrs if any
   Word surfaceWord = Word.createSurfaceWord(w);
   Word coreWord =
       (surfaceWord.attrsIntersect(_coartAttrs))
           ? Word.createCoreSurfaceWord(surfaceWord, _coartAttrs)
           : surfaceWord;
   // lookup core word
   SignHash result = getSignsFromWord(coreWord, null, null, null);
   if (result.size() == 0) {
     throw new LexException(coreWord + " not found in lexicon");
   }
   // return signs if no coart attrs
   if (coreWord == surfaceWord) return result;
   // otherwise apply coarts for word
   applyCoarts(surfaceWord, result);
   return result;
 }