// look up and apply coarts for w to each sign in result @SuppressWarnings("unchecked") private void applyCoarts(Word w, SignHash result) throws LexException { List<Sign> inputSigns = new ArrayList<Sign>(result.asSignSet()); result.clear(); List<Sign> outputSigns = new ArrayList<Sign>(inputSigns.size()); // for each surface attr, lookup coarts and apply to input signs, storing results in output // signs for (Iterator<Pair<String, String>> it = w.getSurfaceAttrValPairs(); it.hasNext(); ) { Pair<String, String> p = it.next(); String attr = (String) p.a; if (!_indexedCoartAttrs.contains(attr)) continue; String val = (String) p.b; Word coartWord = Word.createWord(attr, val); SignHash coartResult = getSignsFromWord(coartWord, null, null, null); for (Iterator<Sign> it2 = coartResult.iterator(); it2.hasNext(); ) { Sign coartSign = it2.next(); // apply to each input for (int j = 0; j < inputSigns.size(); j++) { Sign sign = inputSigns.get(j); grammar.rules.applyCoart(sign, coartSign, outputSigns); } } // switch output to input for next iteration inputSigns.clear(); inputSigns.addAll(outputSigns); outputSigns.clear(); } // add results back result.addAll(inputSigns); }
// get signs using an additional arg for a target rel private Collection<Sign> getSignsFromPredAndTargetRel(String pred, String targetRel) { Collection<Word> words = (Collection<Word>) _predToWords.get(pred); String specialTokenConst = null; // for robustness, when using supertagger, add words for pred sans sense index int dotIndex = -1; if (_supertagger != null && !Character.isDigit(pred.charAt(0)) && // skip numbers (dotIndex = pred.lastIndexOf('.')) > 0 && pred.length() > dotIndex + 1 && pred.charAt(dotIndex + 1) != '_') // skip titles, eg Mr._Smith { String barePred = pred.substring(0, dotIndex); Collection<Word> barePredWords = (Collection<Word>) _predToWords.get(barePred); if (words == null) words = barePredWords; else if (barePredWords != null) { Set<Word> unionWords = new HashSet<Word>(words); unionWords.addAll(barePredWords); words = unionWords; } } if (words == null) { specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(pred)); if (specialTokenConst == null) return null; // lookup words with pred = special token const Collection<Word> specialTokenWords = (Collection<Word>) _predToWords.get(specialTokenConst); // replace special token const with pred if (specialTokenWords == null) return null; words = new ArrayList<Word>(specialTokenWords.size()); for (Iterator<Word> it = specialTokenWords.iterator(); it.hasNext(); ) { Word stw = it.next(); Word w = Word.createSurfaceWord(stw, pred); words.add(w); } } List<Sign> retval = new ArrayList<Sign>(); for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { Word w = it.next(); try { SignHash signs = getSignsFromWord(w, specialTokenConst, pred, targetRel); retval.addAll(signs.asSignSet()); } // shouldn't happen catch (LexException exc) { System.err.println("Unexpected lex exception for word " + w + ": " + exc); } } return retval; }
/** * For a string of 1 or more surface words, return all of the lexical entries for each word as a * list of sign hashes. Tokenization is performed using the configured tokenizer. * * @param w the words in string format * @return a list of sign hashes * @exception LexException thrown if word not found */ public List<SignHash> getEntriesFromWords(String s) throws LexException { List<SignHash> entries = new ArrayList<SignHash>(); List<Word> words = tokenizer.tokenize(s); for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { Word w = it.next(); SignHash signs = getSignsFromWord(w); if (signs.size() == 0) { throw new LexException("Word not in lexicon: \"" + w + "\""); } entries.add(signs); } return entries; }
/** * For a given word, return all of its surface word's lexical entries. If the word is not listed * in the lexicon, the tokenizer is consulted to see if it is a special token (date, time, etc.); * otherwise an exception is thrown. If the word has coarticulations, all applicable * coarticulation entries are applied to the base word, in an arbitrary order. * * @param w the word * @return a sign hash * @exception LexException thrown if word not found */ public SignHash getSignsFromWord(Word w) throws LexException { // reduce word to its core, removing coart attrs if any Word surfaceWord = Word.createSurfaceWord(w); Word coreWord = (surfaceWord.attrsIntersect(_coartAttrs)) ? Word.createCoreSurfaceWord(surfaceWord, _coartAttrs) : surfaceWord; // lookup core word SignHash result = getSignsFromWord(coreWord, null, null, null); if (result.size() == 0) { throw new LexException(coreWord + " not found in lexicon"); } // return signs if no coart attrs if (coreWord == surfaceWord) return result; // otherwise apply coarts for word applyCoarts(surfaceWord, result); return result; }
// given EntriesItem private void getWithEntriesItem( Word w, MorphItem mi, String stem, String pred, String targetPred, String targetRel, EntriesItem item, MacroAdder macAdder, Map<String, Double> supertags, Set<String> supertagsFound, SignHash result) { // ensure apropos if (targetPred != null && !targetPred.equals(pred)) return; if (targetRel != null && !targetRel.equals(item.getIndexRel()) && !targetRel.equals(item.getCoartRel())) return; if (!item.getActive().booleanValue()) return; if (mi.excluded(item)) return; try { // copy and add macros Category cat = item.getCat().copy(); macAdder.addMacros(cat); // replace DEFAULT_VAL with pred, after first // unifying type of associated nom var(s) with sem class unifySemClass(cat, mi.getWord().getSemClass()); REPLACEMENT = pred; cat.deepMap(defaultReplacer); // check supertag // TODO: think about earlier checks for efficiency, for grammars where macros and preds don't // matter // Double lexprob = null; // nb: skipping lex log probs, don't seem to be helpful if (supertags != null) { // skip if not found String stag = cat.getSupertag(); if (!supertags.containsKey(stag)) return; // otherwise update found supertags supertagsFound.add(stag); // get lex prob // lexprob = supertags.get(stag); } // propagate types of nom vars propagateTypes(cat); // handle distrib attrs and inherits-from propagateDistributiveAttrs(cat); expandInheritsFrom(cat); // merge stem, pos, sem class from morph item, plus supertag from cat Word word = Word.createFullWord(w, mi.getWord(), cat.getSupertag()); // set origin and lexprob Sign sign = new Sign(word, cat); sign.setOrigin(); // if (lexprob != null) { // sign.addData(new SupertaggerAdapter.LexLogProb((float) Math.log10(lexprob))); // } // return sign result.insert(sign); } catch (RuntimeException exc) { System.err.println( "Warning: ignoring entry: " + item.getName() + " of family: " + item.getFamilyName() + " for stem: " + stem + " b/c: " + exc.toString()); } }