/** * Uses recurson to get the depth of the input synset (in 'getSynsetDepth()') from a <root> A * synset may have multiple parents, thus we returneach possible depth and 'home hierarchy' <root> * Thus, we may have the same <root> at different depths in the WordNet hierarchy */ private void treecreeper( int depth, HashSet<ISynsetID> synsets, TreeMap<Integer, HashSet<ISynsetID>> depths, ArrayList<Integer> roots) { depth++; ISynset synset = null; HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>(); // next 'level'(inverse of 'depth') for (ISynsetID s : synsets) { synset = dict.getSynset(s); hypernyms.addAll(synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms> hypernyms.addAll( synset.getRelatedSynsets(Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances) } if (!hypernyms.isEmpty()) { for (ISynsetID h : hypernyms) { int offset = h.getOffset(); if (roots.contains(offset)) { if (depths.containsKey(depth)) { HashSet<ISynsetID> deep = depths.get(depth); deep.add(h); depths.put(depth, deep); } else { HashSet<ISynsetID> deep = new HashSet<ISynsetID>(); deep.add(h); depths.put(depth, deep); } } } treecreeper(depth, hypernyms, depths, roots); } return; }
public double getSynsetDepth(ISynset synsetX, String pos) { // IIndexWord word1 = null; // get the WordNet words in *any* POS ArrayList<Integer> homehierarchies = null; if (pos.equalsIgnoreCase("n")) { homehierarchies = nounroots; } if (pos.equalsIgnoreCase("v")) { homehierarchies = verbroots; } // ........................................................................................................................................... // IWordID word1ID = word1.getWordIDs().get(senseno - 1); // get the right sense of word 1 // ISynset synset1 = dict.getWord(word1ID).getSynset(); // ........................................................................................................................................... // get a score TreeMap<Integer, HashSet<ISynsetID>> depths = new TreeMap<Integer, HashSet<ISynsetID>>(); HashSet<ISynsetID> synsets = new HashSet<ISynsetID>(); synsets.add(synsetX.getID()); treecreeper(0, synsets, depths, homehierarchies); if (depths.isEmpty()) { return (0.0); // i.e. is <root>, nothing 'above' it } return ((double) (depths.lastKey() + 2.0)); // ??? node counting, so have to include start and end node }
// getHypernyms for both synsets // get joins -- i.e. the intersection of the hypernyms // get the join(s) with greatest depth in WordNet using getSynsetDepth() private void getHypernyms(HashSet<ISynsetID> synsets, HashSet<ISynsetID> allhypernms) { HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>(); for (ISynsetID s : synsets) { ISynset synset = dict.getSynset(s); hypernyms.addAll( synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms> if there are any hypernyms.addAll( synset.getRelatedSynsets( Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances) if there are any } if (!hypernyms.isEmpty()) { allhypernms.addAll(hypernyms); getHypernyms(hypernyms, allhypernms); } return; }
public List getSynonyms(String phrase) { List synonymsList = new ArrayList(); if (dict == null) { System.out.println("Dictionary is null"); System.exit(0); } IIndexWord idxWord = dict.getIndexWord(phrase, POS.NOUN); if (idxWord != null) { IWordID wordID = (IWordID) idxWord.getWordIDs().get(0); IWord word = dict.getWord(wordID); ISynset synset = word.getSynset(); List words = new ArrayList(); IWord w; for (Iterator iterator = synset.getWords().iterator(); iterator.hasNext(); words.add(w)) w = (IWord) iterator.next(); synonymsList = keepUniqueTerms(words); } return synonymsList; }
public double getSynsetDepth(String word, int senseno, String pos) { IIndexWord word1 = null; // get the WordNet words in *any* POS ArrayList<Integer> homehierarchies = null; // if(pos.equalsIgnoreCase("n")) // { try { word1 = dict.getIndexWord(word, POS.NOUN); homehierarchies = nounroots; System.out.println(word1.toString() + "||||"); // } // if(pos.equalsIgnoreCase("v")) // { // word1 = dict.getIndexWord(word, POS.VERB); // homehierarchies = verbroots; // } // ........................................................................................................................................... IWordID word1ID = word1.getWordIDs().get(senseno - 1); // get the right sense of word 1 ISynset synset1 = dict.getWord(word1ID).getSynset(); // ........................................................................................................................................... // get a score TreeMap<Integer, HashSet<ISynsetID>> depths = new TreeMap<Integer, HashSet<ISynsetID>>(); HashSet<ISynsetID> synsets = new HashSet<ISynsetID>(); synsets.add(synset1.getID()); treecreeper(0, synsets, depths, homehierarchies); if (depths.isEmpty()) { return (0.0); // i.e. is <root>, nothing 'above' it } return ((double) (depths.lastKey() + 2.0)); } catch (Exception ex) { } return 0.0; // ??? node counting, so have to include start and end node }
private int treediver(HashSet<ISynsetID> set) { int depth = 0; ArrayList<ISynsetID> queue = new ArrayList<ISynsetID>(); queue.addAll(set); boolean search = true; while (search) { HashSet<ISynsetID> hyponyms = new HashSet<ISynsetID>(); while (!queue.isEmpty()) { ISynset synset = dict.getSynset(queue.remove(0)); hyponyms.addAll( synset.getRelatedSynsets(Pointer.HYPONYM)); // get the <hyponyms> if there are any hyponyms.addAll( synset.getRelatedSynsets( Pointer.HYPONYM_INSTANCE)); // get the <hyponyms> (instances) if there are any } if (hyponyms.isEmpty()) { search = false; } else { depth++; queue.addAll(hyponyms); } } return (depth); }
public List getHypernym(String phrase) { List hypernymsList = new ArrayList(); if (dict == null) { System.out.println("Dictionary is null"); System.exit(0); } IIndexWord idxWord = dict.getIndexWord(phrase, POS.NOUN); if (idxWord != null) { IWordID wordID = (IWordID) idxWord.getWordIDs().get(0); IWord word = dict.getWord(wordID); ISynset synset = word.getSynset(); List hypernyms = synset.getRelatedSynsets(Pointer.HYPERNYM); List words; for (Iterator iterator = hypernyms.iterator(); iterator.hasNext(); hypernymsList.addAll(words)) { ISynsetID sid = (ISynsetID) iterator.next(); words = dict.getSynset(sid).getWords(); } hypernymsList = keepUniqueTerms(hypernymsList); } return hypernymsList; }
private Duple<CrownOperations.Reason, ISynset> getEstimatedSynonym( String targetLemma, Set<String> synonyms, POS pos, String gloss) { Counter<ISynset> synsetCounts = new ObjectCounter<ISynset>(); List<String> lemmasInWn = new ArrayList<String>(); for (String lemma : synonyms) { // Get the WordNet sysnet if it exists Set<ISynset> senses = WordNetUtils.getSynsets(dict, lemma, pos); if (senses.isEmpty()) continue; lemmasInWn.add(lemma); synsetCounts.countAll(senses); // Get the hypernyms of the synset and count their occurrence too for (ISynset synset : senses) { // Do a sanity check that avoids attaching this Entry if its // lemma appears anywhere near the synonoyms. This check // potentially has some false positives since we might avoid // putting the lemma somewhere valid (in which case it would // have more than would valid location) but is used to avoid // noisy integration if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, synset)) { return null; } for (ISynsetID hyper : synset.getRelatedSynsets(Pointer.HYPERNYM)) { ISynset hyperSyn = dict.getSynset(hyper); if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, hyperSyn)) { return null; } synsetCounts.count(hyperSyn); } } } // Return null if we couldn't find any of the lemma's synonyms or // hyponyms in WordNet if (synsetCounts.items().isEmpty()) return null; // If there was only one lemma in this list in WordNet, try comparing // the glosses for just that word to find a match if (lemmasInWn.size() == 1) { double maxScore = 0; ISynset best = null; String bestGloss = null; Set<ISynset> candidateSynonymSynsets = WordNetUtils.getSynsets(dict, lemmasInWn.get(0), pos); for (ISynset candidate : candidateSynonymSynsets) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "single-synonym"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } else { // Check for whether there were ties in the max ISynset mostFreq = synsetCounts.max(); int mostFreqCount = synsetCounts.getCount(mostFreq); List<ISynset> ties = new ArrayList<ISynset>(); for (ISynset syn : synsetCounts.items()) { int c = synsetCounts.getCount(syn); if (c == mostFreqCount) ties.add(syn); } // If there was only one synset that had the maximum count, then we // report this if (ties.size() == 1) { CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "unambiguous-max"); r.set("count", mostFreqCount); return new Duple<CrownOperations.Reason, ISynset>(r, mostFreq); } // Otherwise, we try breaking ties between the synsets using gloss // similarity else { double maxScore = 0; ISynset best = null; String bestGloss = null; for (ISynset candidate : ties) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "tied-synonyms"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } } }
// Given two input synsets, finds the least common subsumer (LCS) of them. // If there are multiple candidates for the LCS (due to multiple inheritance in WordNet), the LCS // with the // greatest depth is chosen (i.e., the candidate whose shortest path to the root is the longest). public HashSet<ISynsetID> getLCSbyDepth(ISynset synset1, ISynset synset2, String pos) { HashSet<ISynsetID> lcs = new HashSet<ISynsetID>(); if (synset1.equals(synset2)) { HashSet<ISynsetID> identity = new HashSet<ISynsetID>(); identity.add(synset1.getID()); return (identity); } // !!! could be <roots>, in which case there is no subsumer !!! double d1 = getSynsetDepth(synset1, pos); double d2 = getSynsetDepth(synset2, pos); if (d1 == 0.0 && d2 == 0.0) { return (lcs); // !!! return empty set !!! } // !!! *1* of them could be a <root>, in which case there is no subsumer !!! // double d1 = getSynsetDepth(synset1, pos); // double d2 = getSynsetDepth(synset2, pos); if (d1 == 0.0 || d2 == 0.0) { if (d1 == 0.0) { lcs.add(synset1.getID()); } if (d2 == 0.0) { lcs.add(synset2.getID()); } return (lcs); // !!! return !!! } TreeMap<Integer, HashSet<ISynsetID>> map = new TreeMap<Integer, HashSet<ISynsetID>>(); // synset 1 <hypernyms> HashSet<ISynsetID> s1 = new HashSet<ISynsetID>(); s1.add(synset1.getID()); HashSet<ISynsetID> h1 = new HashSet<ISynsetID>(); getHypernyms(s1, h1); // i.e. fill 'h1' with <hypernyms> of synset1 // synset 2 <hypernyms> HashSet<ISynsetID> s2 = new HashSet<ISynsetID>(); s2.add(synset2.getID()); HashSet<ISynsetID> h2 = new HashSet<ISynsetID>(); getHypernyms(s2, h2); // i.e. fill 'h2' with <hypernyms> of synset2 h1.retainAll(h2); // System.out.println(h1); for (ISynsetID h : h1) { // System.out.println(dict.getSynset(h)); // System.out.println(h + "\t\t\t" + getSynsetDepth(h.getOffset(), pos)); TreeMap<Integer, HashSet<ISynsetID>> set = getSynsetDepth(h.getOffset(), pos); for (Integer i : set.keySet()) { HashSet<ISynsetID> subset = set.get(i); // EasyIn.pause(h + "\t" + i + "\t<" + subset + ">"); if (map.containsKey(i)) { HashSet<ISynsetID> store = map.get(i); store.add(h); map.put(i, store); } else { HashSet<ISynsetID> store = new HashSet<ISynsetID>(); store.add(h); map.put(i, store); } } } int key = map.lastKey(); lcs = map.get(key); return (lcs); }