// get licensing features, with appropriate defaults @SuppressWarnings("unchecked") private void loadLicensingFeatures(Element licensingElt) { List<LicensingFeature> licensingFeats = new ArrayList<LicensingFeature>(); boolean containsLexFeat = false; if (licensingElt != null) { for (Iterator<Element> it = licensingElt.getChildren("feat").iterator(); it.hasNext(); ) { Element featElt = it.next(); String attr = featElt.getAttributeValue("attr"); if (attr.equals("lex")) containsLexFeat = true; String val = featElt.getAttributeValue("val"); List<String> alsoLicensedBy = null; String alsoVals = featElt.getAttributeValue("also-licensed-by"); if (alsoVals != null) { alsoLicensedBy = Arrays.asList(alsoVals.split("\\s+")); } boolean licenseEmptyCats = true; boolean licenseMarkedCats = false; boolean instantiate = true; byte loc = LicensingFeature.BOTH; String lmc = featElt.getAttributeValue("license-marked-cats"); if (lmc != null) { licenseMarkedCats = Boolean.valueOf(lmc).booleanValue(); // change defaults licenseEmptyCats = false; loc = LicensingFeature.TARGET_ONLY; instantiate = false; } String lec = featElt.getAttributeValue("license-empty-cats"); if (lec != null) { licenseEmptyCats = Boolean.valueOf(lec).booleanValue(); } String inst = featElt.getAttributeValue("instantiate"); if (inst != null) { instantiate = Boolean.valueOf(inst).booleanValue(); } String locStr = featElt.getAttributeValue("location"); if (locStr != null) { if (locStr.equals("target-only")) loc = LicensingFeature.TARGET_ONLY; if (locStr.equals("args-only")) loc = LicensingFeature.ARGS_ONLY; if (locStr.equals("both")) loc = LicensingFeature.BOTH; } licensingFeats.add( new LicensingFeature( attr, val, alsoLicensedBy, licenseEmptyCats, licenseMarkedCats, instantiate, loc)); } } if (!containsLexFeat) { licensingFeats.add(LicensingFeature.defaultLexFeature); } _licensingFeatures = new LicensingFeature[licensingFeats.size()]; licensingFeats.toArray(_licensingFeatures); }
// returns a macro adder for the given morph item private MacroAdder getMacAdder(MorphItem mi) { // check map MacroAdder retval = macAdderMap.get(mi); if (retval != null) return retval; // set up macro adder IntHashSetMap macrosFromLex = new IntHashSetMap(); String[] newMacroNames = mi.getMacros(); List<MacroItem> macroItems = new ArrayList<MacroItem>(); for (int i = 0; i < newMacroNames.length; i++) { Set<FeatureStructure> featStrucs = (Set<FeatureStructure>) _macros.get(newMacroNames[i]); if (featStrucs != null) { for (Iterator<FeatureStructure> fsIt = featStrucs.iterator(); fsIt.hasNext(); ) { FeatureStructure fs = fsIt.next(); macrosFromLex.put(fs.getIndex(), fs); } } MacroItem macroItem = _macroItems.get(newMacroNames[i]); if (macroItem != null) { macroItems.add(macroItem); } else { // should be checked earlier too System.err.println( "Warning: macro " + newMacroNames[i] + " not found for word '" + mi.getWord() + "'"); } } retval = new MacroAdder(macrosFromLex, macroItems); // update map and return macAdderMap.put(mi, retval); return retval; }
public void handleElement(Element e) { // create morph item if (e.getName().equals("entry")) { try { morphItems.add(new MorphItem(e)); } catch (RuntimeException exc) { System.err.println("Skipping morph item: " + e.getAttributeValue("word")); System.err.println(exc.toString()); } } // create macro item else if (e.getName().equals("macro")) { try { macroItems.add(new MacroItem(e)); } catch (RuntimeException exc) { System.err.println("Skipping macro item: " + e.getAttributeValue("name")); System.err.println(exc.toString()); } } }
/** * For a string of 1 or more surface words, return all of the lexical entries for each word as a * list of sign hashes. Tokenization is performed using the configured tokenizer. * * @param w the words in string format * @return a list of sign hashes * @exception LexException thrown if word not found */ public List<SignHash> getEntriesFromWords(String s) throws LexException { List<SignHash> entries = new ArrayList<SignHash>(); List<Word> words = tokenizer.tokenize(s); for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { Word w = it.next(); SignHash signs = getSignsFromWord(w); if (signs.size() == 0) { throw new LexException("Word not in lexicon: \"" + w + "\""); } entries.add(signs); } return entries; }
public void handleElement(Element e) { // create family if (e.getName().equals("family")) { try { lexicon.add(new Family(e)); } catch (RuntimeException exc) { System.err.println("Skipping family: " + e.getAttributeValue("name")); System.err.println(exc.toString()); } } // save distributive attributes else if (e.getName().equals("distributive-features")) distrElt = e; // save licensing features else if (e.getName().equals("licensing-features")) licensingElt = e; // save relation sort order else if (e.getName().equals("relation-sorting")) relationSortingElt = e; }
/** Loads the lexicon and morph files. */ public void init(URL lexiconUrl, URL morphUrl) throws IOException { List<Family> lexicon = null; List<MorphItem> morph = null; List<MacroItem> macroModel = null; // load category families (lexicon), morph forms and macros lexicon = getLexicon(lexiconUrl); Pair<List<MorphItem>, List<MacroItem>> morphInfo = getMorph(morphUrl); morph = morphInfo.a; macroModel = morphInfo.b; // index words; also index stems to words, as default preds // store indexed coarticulation attrs too _words = new GroupMap<Word, MorphItem>(); _predToWords = new GroupMap<String, Word>(); _coartAttrs = new HashSet<String>(); _indexedCoartAttrs = new HashSet<String>(); for (MorphItem morphItem : morph) { Word surfaceWord = morphItem.getSurfaceWord(); _words.put(surfaceWord, morphItem); _predToWords.put(morphItem.getWord().getStem(), surfaceWord); if (morphItem.isCoart()) { Word indexingWord = morphItem.getCoartIndexingWord(); _words.put(indexingWord, morphItem); Pair<String, String> first = indexingWord.getSurfaceAttrValPairs().next(); _indexedCoartAttrs.add(first.a); for (Iterator<Pair<String, String>> it = surfaceWord.getSurfaceAttrValPairs(); it.hasNext(); ) { Pair<String, String> p = it.next(); _coartAttrs.add(p.a); } } } // index entries based on stem+pos _stems = new GroupMap<String, Object>(); _posToEntries = new GroupMap<String, EntriesItem[]>(); // index entries by supertag+pos, for supertagging _stagToEntries = new GroupMap<String, EntriesItem>(); // also index rels and coart rels to preds _relsToPreds = new GroupMap<String, String>(); _coartRelsToPreds = new GroupMap<String, String>(); // and gather list of attributes used per atomic category type _catsToAttrs = new GroupMap<String, String>(); _lfAttrs = new HashSet<String>(); // and remember family and ent, names, for checking excluded list on morph items HashSet<String> familyAndEntryNames = new HashSet<String>(); // index each family for (Family family : lexicon) { familyAndEntryNames.add(family.getName()); EntriesItem[] entries = family.getEntries(); DataItem[] data = family.getData(); // for generic use when we get an unknown stem // from the morphological analyzer if (!family.isClosed()) { _posToEntries.put(family.getPOS(), entries); } // scan through entries for (int j = 0; j < entries.length; j++) { // index EntriesItem eItem = entries[j]; _stagToEntries.put(eItem.getSupertag() + family.getPOS(), eItem); if (eItem.getStem().length() > 0) { _stems.put(eItem.getStem() + family.getPOS(), eItem); } try { // gather features eItem.getCat().forall(gatherAttrs); // record names familyAndEntryNames.add(eItem.getName()); familyAndEntryNames.add(eItem.getQualifiedName()); } catch (RuntimeException exc) { System.err.println("exception for: " + family.getName() + ": " + exc); } } // scan through data for (int j = 0; j < data.length; j++) { DataItem dItem = data[j]; _stems.put( dItem.getStem() + family.getPOS(), new Pair<DataItem, EntriesItem[]>(dItem, entries)); // index non-default preds to words if (!dItem.getStem().equals(dItem.getPred())) { Collection<Word> words = (Collection<Word>) _predToWords.get(dItem.getStem()); if (words == null) { if (!openlex) { System.out.print("Warning: couldn't find words for pred '"); System.out.println(dItem.getPred() + "' with stem '" + dItem.getStem() + "'"); } } else { for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { _predToWords.put(dItem.getPred(), it.next()); } } } } // index rels to preds // nb: this covers relational (eg @x<GenRel>e) and featural (eg @e<tense>past) // elementary predications List<String> indexRels = new ArrayList<String>(3); String familyIndexRel = family.getIndexRel(); if (familyIndexRel.length() > 0) { indexRels.add(familyIndexRel); } for (int j = 0; j < entries.length; j++) { EntriesItem eItem = entries[j]; String indexRel = eItem.getIndexRel(); if (indexRel.length() > 0 && !indexRel.equals(familyIndexRel)) { indexRels.add(indexRel); } } for (Iterator<String> it = indexRels.iterator(); it.hasNext(); ) { String indexRel = it.next(); // nb: not indexing on entries items, b/c some stems are still defaults for (int j = 0; j < data.length; j++) { DataItem dItem = data[j]; _relsToPreds.put(indexRel, dItem.getPred()); } } // index coart rels (features, really) to preds String coartRel = family.getCoartRel(); if (coartRel.length() > 0) { for (int j = 0; j < data.length; j++) { _coartRelsToPreds.put(coartRel, data[j].getPred()); } } } // index the macros _macros = new GroupMap<String, FeatureStructure>(); // nb: could just index MacroItem objects for feature structures too; // this might be a bit cleaner, but life is short _macroItems = new HashMap<String, MacroItem>(); for (MacroItem mi : macroModel) { String macName = mi.getName(); FeatureStructure[] specs = mi.getFeatureStructures(); for (int j = 0; j < specs.length; j++) { _macros.put(macName, specs[j]); } // this is for handling LF part of macros _macroItems.put(macName, mi); } // with morph items, check POS, macro names, excluded list for xref for (MorphItem morphItem : morph) { Word w = morphItem.getWord(); if (!openlex && !_stems.containsKey(w.getStem() + w.getPOS()) && !_posToEntries.containsKey(w.getPOS())) { System.err.println( "Warning: no entries for stem '" + w.getStem() + "' and POS '" + w.getPOS() + "' found for word '" + w + "'"); } String[] macroNames = morphItem.getMacros(); for (int j = 0; j < macroNames.length; j++) { if (!_macroItems.containsKey(macroNames[j])) { System.err.println( "Warning: macro " + macroNames[j] + " not found for word '" + morphItem.getWord() + "'"); } } String[] excludedNames = morphItem.getExcluded(); for (int j = 0; j < excludedNames.length; j++) { if (!familyAndEntryNames.contains(excludedNames[j])) { System.err.println( "Warning: excluded family or entry '" + excludedNames[j] + "' not found for word '" + morphItem.getWord() + "'"); } } } }