/**
   * Uses recurson to get the depth of the input synset (in 'getSynsetDepth()') from a <root> A
   * synset may have multiple parents, thus we returneach possible depth and 'home hierarchy' <root>
   * Thus, we may have the same <root> at different depths in the WordNet hierarchy
   */
  private void treecreeper(
      int depth,
      HashSet<ISynsetID> synsets,
      TreeMap<Integer, HashSet<ISynsetID>> depths,
      ArrayList<Integer> roots) {
    depth++;
    ISynset synset = null;
    HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>(); // next 'level'(inverse of 'depth')
    for (ISynsetID s : synsets) {
      synset = dict.getSynset(s);
      hypernyms.addAll(synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms>
      hypernyms.addAll(
          synset.getRelatedSynsets(Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances)
    }

    if (!hypernyms.isEmpty()) {
      for (ISynsetID h : hypernyms) {
        int offset = h.getOffset();
        if (roots.contains(offset)) {
          if (depths.containsKey(depth)) {
            HashSet<ISynsetID> deep = depths.get(depth);
            deep.add(h);
            depths.put(depth, deep);
          } else {
            HashSet<ISynsetID> deep = new HashSet<ISynsetID>();
            deep.add(h);
            depths.put(depth, deep);
          }
        }
      }
      treecreeper(depth, hypernyms, depths, roots);
    }
    return;
  }
  public double getSynsetDepth(ISynset synsetX, String pos) {
    // IIndexWord	word1	=	null;
    // get the WordNet words in *any* POS

    ArrayList<Integer> homehierarchies = null;

    if (pos.equalsIgnoreCase("n")) {
      homehierarchies = nounroots;
    }
    if (pos.equalsIgnoreCase("v")) {
      homehierarchies = verbroots;
    }

    // ...........................................................................................................................................
    // IWordID	word1ID	=	word1.getWordIDs().get(senseno - 1); // get the right sense of word 1
    // ISynset		synset1		=	dict.getWord(word1ID).getSynset();
    // ...........................................................................................................................................
    // get a score

    TreeMap<Integer, HashSet<ISynsetID>> depths = new TreeMap<Integer, HashSet<ISynsetID>>();
    HashSet<ISynsetID> synsets = new HashSet<ISynsetID>();
    synsets.add(synsetX.getID());
    treecreeper(0, synsets, depths, homehierarchies);
    if (depths.isEmpty()) {
      return (0.0); // i.e. is <root>, nothing 'above' it
    }
    return ((double)
        (depths.lastKey() + 2.0)); // ??? node counting, so have to include start and end node
  }
 // getHypernyms for both synsets
 // get joins -- i.e. the intersection of the hypernyms
 // get the join(s) with greatest depth in WordNet using getSynsetDepth()
 private void getHypernyms(HashSet<ISynsetID> synsets, HashSet<ISynsetID> allhypernms) {
   HashSet<ISynsetID> hypernyms = new HashSet<ISynsetID>();
   for (ISynsetID s : synsets) {
     ISynset synset = dict.getSynset(s);
     hypernyms.addAll(
         synset.getRelatedSynsets(Pointer.HYPERNYM)); // get the <hypernyms> if there are any
     hypernyms.addAll(
         synset.getRelatedSynsets(
             Pointer.HYPERNYM_INSTANCE)); // get the <hypernyms> (instances) if there are any
   }
   if (!hypernyms.isEmpty()) {
     allhypernms.addAll(hypernyms);
     getHypernyms(hypernyms, allhypernms);
   }
   return;
 }
Beispiel #4
0
  public List getSynonyms(String phrase) {
    List synonymsList = new ArrayList();
    if (dict == null) {
      System.out.println("Dictionary is null");
      System.exit(0);
    }
    IIndexWord idxWord = dict.getIndexWord(phrase, POS.NOUN);
    if (idxWord != null) {
      IWordID wordID = (IWordID) idxWord.getWordIDs().get(0);
      IWord word = dict.getWord(wordID);
      ISynset synset = word.getSynset();
      List words = new ArrayList();
      IWord w;
      for (Iterator iterator = synset.getWords().iterator(); iterator.hasNext(); words.add(w))
        w = (IWord) iterator.next();

      synonymsList = keepUniqueTerms(words);
    }
    return synonymsList;
  }
  public double getSynsetDepth(String word, int senseno, String pos) {
    IIndexWord word1 = null;
    // get the WordNet words in *any* POS
    ArrayList<Integer> homehierarchies = null;
    // if(pos.equalsIgnoreCase("n"))
    // {
    try {
      word1 = dict.getIndexWord(word, POS.NOUN);
      homehierarchies = nounroots;
      System.out.println(word1.toString() + "||||");

      // }
      // if(pos.equalsIgnoreCase("v"))
      // {
      //	word1 = dict.getIndexWord(word, POS.VERB);
      //	homehierarchies	=	verbroots;
      // }
      // ...........................................................................................................................................
      IWordID word1ID = word1.getWordIDs().get(senseno - 1); // get the right sense of word 1
      ISynset synset1 = dict.getWord(word1ID).getSynset();
      // ...........................................................................................................................................
      // get a score
      TreeMap<Integer, HashSet<ISynsetID>> depths = new TreeMap<Integer, HashSet<ISynsetID>>();
      HashSet<ISynsetID> synsets = new HashSet<ISynsetID>();
      synsets.add(synset1.getID());
      treecreeper(0, synsets, depths, homehierarchies);
      if (depths.isEmpty()) {
        return (0.0); // i.e. is <root>, nothing 'above' it
      }
      return ((double) (depths.lastKey() + 2.0));

    } catch (Exception ex) {
    }

    return 0.0;
    // ??? node counting, so have to include start and end node
  }
 private int treediver(HashSet<ISynsetID> set) {
   int depth = 0;
   ArrayList<ISynsetID> queue = new ArrayList<ISynsetID>();
   queue.addAll(set);
   boolean search = true;
   while (search) {
     HashSet<ISynsetID> hyponyms = new HashSet<ISynsetID>();
     while (!queue.isEmpty()) {
       ISynset synset = dict.getSynset(queue.remove(0));
       hyponyms.addAll(
           synset.getRelatedSynsets(Pointer.HYPONYM)); // get the <hyponyms> if there are any
       hyponyms.addAll(
           synset.getRelatedSynsets(
               Pointer.HYPONYM_INSTANCE)); // get the <hyponyms> (instances) if there are any
     }
     if (hyponyms.isEmpty()) {
       search = false;
     } else {
       depth++;
       queue.addAll(hyponyms);
     }
   }
   return (depth);
 }
Beispiel #7
0
  public List getHypernym(String phrase) {
    List hypernymsList = new ArrayList();
    if (dict == null) {
      System.out.println("Dictionary is null");
      System.exit(0);
    }
    IIndexWord idxWord = dict.getIndexWord(phrase, POS.NOUN);
    if (idxWord != null) {
      IWordID wordID = (IWordID) idxWord.getWordIDs().get(0);
      IWord word = dict.getWord(wordID);
      ISynset synset = word.getSynset();
      List hypernyms = synset.getRelatedSynsets(Pointer.HYPERNYM);
      List words;
      for (Iterator iterator = hypernyms.iterator();
          iterator.hasNext();
          hypernymsList.addAll(words)) {
        ISynsetID sid = (ISynsetID) iterator.next();
        words = dict.getSynset(sid).getWords();
      }

      hypernymsList = keepUniqueTerms(hypernymsList);
    }
    return hypernymsList;
  }
  private Duple<CrownOperations.Reason, ISynset> getEstimatedSynonym(
      String targetLemma, Set<String> synonyms, POS pos, String gloss) {

    Counter<ISynset> synsetCounts = new ObjectCounter<ISynset>();

    List<String> lemmasInWn = new ArrayList<String>();
    for (String lemma : synonyms) {
      // Get the WordNet sysnet if it exists
      Set<ISynset> senses = WordNetUtils.getSynsets(dict, lemma, pos);
      if (senses.isEmpty()) continue;

      lemmasInWn.add(lemma);
      synsetCounts.countAll(senses);

      // Get the hypernyms of the synset and count their occurrence too
      for (ISynset synset : senses) {
        // Do a sanity check that avoids attaching this Entry if its
        // lemma appears anywhere near the synonoyms.  This check
        // potentially has some false positives since we might avoid
        // putting the lemma somewhere valid (in which case it would
        // have more than would valid location) but is used to avoid
        // noisy integration
        if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, synset)) {
          return null;
        }

        for (ISynsetID hyper : synset.getRelatedSynsets(Pointer.HYPERNYM)) {
          ISynset hyperSyn = dict.getSynset(hyper);
          if (WordNetUtils.isAlreadyInWordNet(dict, targetLemma, pos, hyperSyn)) {
            return null;
          }
          synsetCounts.count(hyperSyn);
        }
      }
    }

    // Return null if we couldn't find any of the lemma's synonyms or
    // hyponyms in WordNet
    if (synsetCounts.items().isEmpty()) return null;

    // If there was only one lemma in this list in WordNet, try comparing
    // the glosses for just that word to find a match
    if (lemmasInWn.size() == 1) {
      double maxScore = 0;
      ISynset best = null;
      String bestGloss = null;
      Set<ISynset> candidateSynonymSynsets = WordNetUtils.getSynsets(dict, lemmasInWn.get(0), pos);
      for (ISynset candidate : candidateSynonymSynsets) {

        String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate);
        double score = simFunc.compare(gloss, wnExtendedGloss);
        if (maxScore < score) {
          maxScore = score;
          best = candidate;
          bestGloss = wnExtendedGloss;
        }
      }

      CrownOperations.Reason r = new CrownOperations.Reason(getClass());
      r.set("relation_type", "synonym");
      r.set("heuristic", "single-synonym");
      r.set("max_score", maxScore);
      return new Duple<CrownOperations.Reason, ISynset>(r, best);
    } else {
      // Check for whether there were ties in the max
      ISynset mostFreq = synsetCounts.max();
      int mostFreqCount = synsetCounts.getCount(mostFreq);
      List<ISynset> ties = new ArrayList<ISynset>();
      for (ISynset syn : synsetCounts.items()) {
        int c = synsetCounts.getCount(syn);
        if (c == mostFreqCount) ties.add(syn);
      }

      // If there was only one synset that had the maximum count, then we
      // report this
      if (ties.size() == 1) {

        CrownOperations.Reason r = new CrownOperations.Reason(getClass());
        r.set("relation_type", "synonym");
        r.set("heuristic", "unambiguous-max");
        r.set("count", mostFreqCount);
        return new Duple<CrownOperations.Reason, ISynset>(r, mostFreq);
      }
      // Otherwise, we try breaking ties between the synsets using gloss
      // similarity
      else {

        double maxScore = 0;
        ISynset best = null;
        String bestGloss = null;
        for (ISynset candidate : ties) {
          String wnExtendedGloss = WordNetUtils.getGlossWithoutExamples(candidate);
          double score = simFunc.compare(gloss, wnExtendedGloss);
          if (maxScore < score) {
            maxScore = score;
            best = candidate;
            bestGloss = wnExtendedGloss;
          }
        }

        CrownOperations.Reason r = new CrownOperations.Reason(getClass());
        r.set("relation_type", "synonym");
        r.set("heuristic", "tied-synonyms");
        r.set("max_score", maxScore);
        return new Duple<CrownOperations.Reason, ISynset>(r, best);
      }
    }
  }
  // Given two input synsets, finds the least common subsumer (LCS) of them.
  // If there are multiple candidates for the LCS (due to multiple inheritance in WordNet), the LCS
  // with the
  // greatest depth is chosen (i.e., the candidate whose shortest path to the root is the longest).
  public HashSet<ISynsetID> getLCSbyDepth(ISynset synset1, ISynset synset2, String pos) {
    HashSet<ISynsetID> lcs = new HashSet<ISynsetID>();

    if (synset1.equals(synset2)) {
      HashSet<ISynsetID> identity = new HashSet<ISynsetID>();
      identity.add(synset1.getID());
      return (identity);
    }
    // !!! could be <roots>, in which case there is no subsumer !!!
    double d1 = getSynsetDepth(synset1, pos);
    double d2 = getSynsetDepth(synset2, pos);
    if (d1 == 0.0 && d2 == 0.0) {
      return (lcs); // !!! return empty set !!!
    }
    // !!! *1* of them could be a <root>, in which case there is no subsumer !!!
    // double d1 = getSynsetDepth(synset1, pos);
    // double d2 = getSynsetDepth(synset2, pos);
    if (d1 == 0.0 || d2 == 0.0) {
      if (d1 == 0.0) {
        lcs.add(synset1.getID());
      }
      if (d2 == 0.0) {
        lcs.add(synset2.getID());
      }
      return (lcs); // !!! return !!!
    }
    TreeMap<Integer, HashSet<ISynsetID>> map = new TreeMap<Integer, HashSet<ISynsetID>>();
    // synset 1 <hypernyms>
    HashSet<ISynsetID> s1 = new HashSet<ISynsetID>();
    s1.add(synset1.getID());
    HashSet<ISynsetID> h1 = new HashSet<ISynsetID>();
    getHypernyms(s1, h1); // i.e. fill 'h1' with <hypernyms> of synset1
    // synset 2 <hypernyms>
    HashSet<ISynsetID> s2 = new HashSet<ISynsetID>();
    s2.add(synset2.getID());
    HashSet<ISynsetID> h2 = new HashSet<ISynsetID>();
    getHypernyms(s2, h2); // i.e. fill 'h2' with <hypernyms> of synset2
    h1.retainAll(h2);
    // System.out.println(h1);
    for (ISynsetID h : h1) {
      // System.out.println(dict.getSynset(h));
      // System.out.println(h + "\t\t\t" + getSynsetDepth(h.getOffset(), pos));
      TreeMap<Integer, HashSet<ISynsetID>> set = getSynsetDepth(h.getOffset(), pos);
      for (Integer i : set.keySet()) {

        HashSet<ISynsetID> subset = set.get(i);
        // EasyIn.pause(h + "\t" + i + "\t<" + subset + ">");
        if (map.containsKey(i)) {
          HashSet<ISynsetID> store = map.get(i);
          store.add(h);
          map.put(i, store);
        } else {
          HashSet<ISynsetID> store = new HashSet<ISynsetID>();
          store.add(h);
          map.put(i, store);
        }
      }
    }
    int key = map.lastKey();
    lcs = map.get(key);
    return (lcs);
  }