/** * Uses recurson to get the depth of the input synset (in 'getSynsetDepth()') from a <root> A * synset may have multiple parents, thus we returneach possible depth and 'home hierarchy' <root> * Thus, we may have the same <root> at different depths in the WordNet hierarchy */ private void treecreeper( int depth, HashSet<ISynsetID> synsets, TreeMap<Integer, HashSet<ISynsetID>> depths, ArrayList<Integer> roots) { depth++; ISynset synset = null; HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>(); // next 'level'(inverse of 'depth') for (ISynsetID s : synsets) { synset = dict.getSynset(s); hypernyms.addAll(synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms> hypernyms.addAll( synset.getRelatedSynsets(Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances) } if (!hypernyms.isEmpty()) { for (ISynsetID h : hypernyms) { int offset = h.getOffset(); if (roots.contains(offset)) { if (depths.containsKey(depth)) { HashSet<ISynsetID> deep = depths.get(depth); deep.add(h); depths.put(depth, deep); } else { HashSet<ISynsetID> deep = new HashSet<ISynsetID>(); deep.add(h); depths.put(depth, deep); } } } treecreeper(depth, hypernyms, depths, roots); } return; }
// getHypernyms for both synsets // get joins -- i.e. the intersection of the hypernyms // get the join(s) with greatest depth in WordNet using getSynsetDepth() private void getHypernyms(HashSet<ISynsetID> synsets, HashSet<ISynsetID> allhypernms) { HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>(); for (ISynsetID s : synsets) { ISynset synset = dict.getSynset(s); hypernyms.addAll( synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms> if there are any hypernyms.addAll( synset.getRelatedSynsets( Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances) if there are any } if (!hypernyms.isEmpty()) { allhypernms.addAll(hypernyms); getHypernyms(hypernyms, allhypernms); } return; }
private int treediver(HashSet<ISynsetID> set) { int depth = 0; ArrayList<ISynsetID> queue = new ArrayList<ISynsetID>(); queue.addAll(set); boolean search = true; while (search) { HashSet<ISynsetID> hyponyms = new HashSet<ISynsetID>(); while (!queue.isEmpty()) { ISynset synset = dict.getSynset(queue.remove(0)); hyponyms.addAll( synset.getRelatedSynsets(Pointer.HYPONYM)); // get the <hyponyms> if there are any hyponyms.addAll( synset.getRelatedSynsets( Pointer.HYPONYM_INSTANCE)); // get the <hyponyms> (instances) if there are any } if (hyponyms.isEmpty()) { search = false; } else { depth++; queue.addAll(hyponyms); } } return (depth); }
private Duple<CrownOperations.Reason, ISynset> getEstimatedSynonym( String targetLemma, Set<String> synonyms, POS pos, String gloss) { Counter<ISynset> synsetCounts = new ObjectCounter<ISynset>(); List<String> lemmasInWn = new ArrayList<String>(); for (String lemma : synonyms) { // Get the WordNet sysnet if it exists Set<ISynset> senses = WordNetUtils.getSynsets(dict, lemma, pos); if (senses.isEmpty()) continue; lemmasInWn.add(lemma); synsetCounts.countAll(senses); // Get the hypernyms of the synset and count their occurrence too for (ISynset synset : senses) { // Do a sanity check that avoids attaching this Entry if its // lemma appears anywhere near the synonoyms. This check // potentially has some false positives since we might avoid // putting the lemma somewhere valid (in which case it would // have more than would valid location) but is used to avoid // noisy integration if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, synset)) { return null; } for (ISynsetID hyper : synset.getRelatedSynsets(Pointer.HYPERNYM)) { ISynset hyperSyn = dict.getSynset(hyper); if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, hyperSyn)) { return null; } synsetCounts.count(hyperSyn); } } } // Return null if we couldn't find any of the lemma's synonyms or // hyponyms in WordNet if (synsetCounts.items().isEmpty()) return null; // If there was only one lemma in this list in WordNet, try comparing // the glosses for just that word to find a match if (lemmasInWn.size() == 1) { double maxScore = 0; ISynset best = null; String bestGloss = null; Set<ISynset> candidateSynonymSynsets = WordNetUtils.getSynsets(dict, lemmasInWn.get(0), pos); for (ISynset candidate : candidateSynonymSynsets) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "single-synonym"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } else { // Check for whether there were ties in the max ISynset mostFreq = synsetCounts.max(); int mostFreqCount = synsetCounts.getCount(mostFreq); List<ISynset> ties = new ArrayList<ISynset>(); for (ISynset syn : synsetCounts.items()) { int c = synsetCounts.getCount(syn); if (c == mostFreqCount) ties.add(syn); } // If there was only one synset that had the maximum count, then we // report this if (ties.size() == 1) { CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "unambiguous-max"); r.set("count", mostFreqCount); return new Duple<CrownOperations.Reason, ISynset>(r, mostFreq); } // Otherwise, we try breaking ties between the synsets using gloss // similarity else { double maxScore = 0; ISynset best = null; String bestGloss = null; for (ISynset candidate : ties) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "tied-synonyms"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } } }