예제 #1
0
 /**
  * Returns the ranking <i>k</i> such that {@code other} is the k<sup>th</th> most similar word to
  * {@code target} in the semantic space.
  */
 private int findRank(SemanticSpace sspace, String target, String other) {
   Vector v1 = sspace.getVector(target);
   Vector v2 = sspace.getVector(other);
   // Compute the base similarity between the two words
   double baseSim = Similarity.cosineSimilarity(v1, v2);
   int rank = 0;
   // Next, count how many words are more similar to the target than the
   // other word is
   for (String word : sspace.getWords()) {
     Vector v = sspace.getVector(word);
     double sim = Similarity.cosineSimilarity(v1, v);
     if (sim > baseSim) rank++;
   }
   return rank;
 }
예제 #2
0
  /** Saves the {@link BasisMapping} created from the {@link OccurrenceCounter}. */
  protected void saveSSpace(SemanticSpace sspace, File outputFile) throws IOException {
    BasisMapping<String, String> savedTerms = new StringBasisMapping();
    for (String term : sspace.getWords()) savedTerms.getDimension(term);

    ObjectOutputStream ouStream = new ObjectOutputStream(new FileOutputStream(outputFile));
    ouStream.writeObject(savedTerms);
    ouStream.close();
  }
예제 #3
0
  /**
   * Returns the association of the two words on a scale of 0 to 1.
   *
   * @return the assocation or {@code null} if either {@code word1} or {@code word2} are not in the
   *     semantic space
   */
  protected Double computeAssociation(SemanticSpace sspace, String word1, String word2) {
    Vector v1 = sspace.getVector(word1);
    Vector v2 = sspace.getVector(word2);
    if (v1 == null || v2 == null) return null;

    // Find the ranks of each of the two words to each other
    double rank1 = findRank(sspace, word1, word2);
    double rank2 = findRank(sspace, word2, word1);
    return 2d / (rank1 + rank2);
  }