// get signs with additional args for a known special token const, target pred and target rel private SignHash getSignsFromWord( Word w, String specialTokenConst, String targetPred, String targetRel) throws LexException { Collection<MorphItem> morphItems = (specialTokenConst == null) ? (Collection<MorphItem>) _words.get(w) : null; if (morphItems == null) { // check for special tokens if (specialTokenConst == null) { specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(w.getForm())); targetPred = w.getForm(); } if (specialTokenConst != null) { Word key = Word.createSurfaceWord(w, specialTokenConst); morphItems = (Collection<MorphItem>) _words.get(key); } // otherwise throw lex exception if (morphItems == null) throw new LexException(w + " not in lexicon"); } SignHash result = new SignHash(); for (Iterator<MorphItem> MI = morphItems.iterator(); MI.hasNext(); ) { getWithMorphItem(w, MI.next(), targetPred, targetRel, result); } return result; }
// get signs using an additional arg for a target rel private Collection<Sign> getSignsFromPredAndTargetRel(String pred, String targetRel) { Collection<Word> words = (Collection<Word>) _predToWords.get(pred); String specialTokenConst = null; // for robustness, when using supertagger, add words for pred sans sense index int dotIndex = -1; if (_supertagger != null && !Character.isDigit(pred.charAt(0)) && // skip numbers (dotIndex = pred.lastIndexOf('.')) > 0 && pred.length() > dotIndex + 1 && pred.charAt(dotIndex + 1) != '_') // skip titles, eg Mr._Smith { String barePred = pred.substring(0, dotIndex); Collection<Word> barePredWords = (Collection<Word>) _predToWords.get(barePred); if (words == null) words = barePredWords; else if (barePredWords != null) { Set<Word> unionWords = new HashSet<Word>(words); unionWords.addAll(barePredWords); words = unionWords; } } if (words == null) { specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(pred)); if (specialTokenConst == null) return null; // lookup words with pred = special token const Collection<Word> specialTokenWords = (Collection<Word>) _predToWords.get(specialTokenConst); // replace special token const with pred if (specialTokenWords == null) return null; words = new ArrayList<Word>(specialTokenWords.size()); for (Iterator<Word> it = specialTokenWords.iterator(); it.hasNext(); ) { Word stw = it.next(); Word w = Word.createSurfaceWord(stw, pred); words.add(w); } } List<Sign> retval = new ArrayList<Sign>(); for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { Word w = it.next(); try { SignHash signs = getSignsFromWord(w, specialTokenConst, pred, targetRel); retval.addAll(signs.asSignSet()); } // shouldn't happen catch (LexException exc) { System.err.println("Unexpected lex exception for word " + w + ": " + exc); } } return retval; }
/** * For a given word, return all of its surface word's lexical entries. If the word is not listed * in the lexicon, the tokenizer is consulted to see if it is a special token (date, time, etc.); * otherwise an exception is thrown. If the word has coarticulations, all applicable * coarticulation entries are applied to the base word, in an arbitrary order. * * @param w the word * @return a sign hash * @exception LexException thrown if word not found */ public SignHash getSignsFromWord(Word w) throws LexException { // reduce word to its core, removing coart attrs if any Word surfaceWord = Word.createSurfaceWord(w); Word coreWord = (surfaceWord.attrsIntersect(_coartAttrs)) ? Word.createCoreSurfaceWord(surfaceWord, _coartAttrs) : surfaceWord; // lookup core word SignHash result = getSignsFromWord(coreWord, null, null, null); if (result.size() == 0) { throw new LexException(coreWord + " not found in lexicon"); } // return signs if no coart attrs if (coreWord == surfaceWord) return result; // otherwise apply coarts for word applyCoarts(surfaceWord, result); return result; }