private Duple<CrownOperations.Reason, ISynset> getEstimatedSynonym( String targetLemma, Set<String> synonyms, POS pos, String gloss) { Counter<ISynset> synsetCounts = new ObjectCounter<ISynset>(); List<String> lemmasInWn = new ArrayList<String>(); for (String lemma : synonyms) { // Get the WordNet sysnet if it exists Set<ISynset> senses = WordNetUtils.getSynsets(dict, lemma, pos); if (senses.isEmpty()) continue; lemmasInWn.add(lemma); synsetCounts.countAll(senses); // Get the hypernyms of the synset and count their occurrence too for (ISynset synset : senses) { // Do a sanity check that avoids attaching this Entry if its // lemma appears anywhere near the synonoyms. This check // potentially has some false positives since we might avoid // putting the lemma somewhere valid (in which case it would // have more than would valid location) but is used to avoid // noisy integration if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, synset)) { return null; } for (ISynsetID hyper : synset.getRelatedSynsets(Pointer.HYPERNYM)) { ISynset hyperSyn = dict.getSynset(hyper); if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, hyperSyn)) { return null; } synsetCounts.count(hyperSyn); } } } // Return null if we couldn't find any of the lemma's synonyms or // hyponyms in WordNet if (synsetCounts.items().isEmpty()) return null; // If there was only one lemma in this list in WordNet, try comparing // the glosses for just that word to find a match if (lemmasInWn.size() == 1) { double maxScore = 0; ISynset best = null; String bestGloss = null; Set<ISynset> candidateSynonymSynsets = WordNetUtils.getSynsets(dict, lemmasInWn.get(0), pos); for (ISynset candidate : candidateSynonymSynsets) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "single-synonym"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } else { // Check for whether there were ties in the max ISynset mostFreq = synsetCounts.max(); int mostFreqCount = synsetCounts.getCount(mostFreq); List<ISynset> ties = new ArrayList<ISynset>(); for (ISynset syn : synsetCounts.items()) { int c = synsetCounts.getCount(syn); if (c == mostFreqCount) ties.add(syn); } // If there was only one synset that had the maximum count, then we // report this if (ties.size() == 1) { CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "unambiguous-max"); r.set("count", mostFreqCount); return new Duple<CrownOperations.Reason, ISynset>(r, mostFreq); } // Otherwise, we try breaking ties between the synsets using gloss // similarity else { double maxScore = 0; ISynset best = null; String bestGloss = null; for (ISynset candidate : ties) { String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate); double score = simFunc.compare(gloss, wnExtendedGloss); if (maxScore < score) { maxScore = score; best = candidate; bestGloss = wnExtendedGloss; } } CrownOperations.Reason r = new CrownOperations.Reason(getClass()); r.set("relation_type", "synonym"); r.set("heuristic", "tied-synonyms"); r.set("max_score", maxScore); return new Duple<CrownOperations.Reason, ISynset>(r, best); } } }