/** * Returns the ranking <i>k</i> such that {@code other} is the k<sup>th</th> most similar word to * {@code target} in the semantic space. */ private int findRank(SemanticSpace sspace, String target, String other) { Vector v1 = sspace.getVector(target); Vector v2 = sspace.getVector(other); // Compute the base similarity between the two words double baseSim = Similarity.cosineSimilarity(v1, v2); int rank = 0; // Next, count how many words are more similar to the target than the // other word is for (String word : sspace.getWords()) { Vector v = sspace.getVector(word); double sim = Similarity.cosineSimilarity(v1, v); if (sim > baseSim) rank++; } return rank; }
/** Saves the {@link BasisMapping} created from the {@link OccurrenceCounter}. */ protected void saveSSpace(SemanticSpace sspace, File outputFile) throws IOException { BasisMapping<String, String> savedTerms = new StringBasisMapping(); for (String term : sspace.getWords()) savedTerms.getDimension(term); ObjectOutputStream ouStream = new ObjectOutputStream(new FileOutputStream(outputFile)); ouStream.writeObject(savedTerms); ouStream.close(); }
/** * Returns the association of the two words on a scale of 0 to 1. * * @return the assocation or {@code null} if either {@code word1} or {@code word2} are not in the * semantic space */ protected Double computeAssociation(SemanticSpace sspace, String word1, String word2) { Vector v1 = sspace.getVector(word1); Vector v2 = sspace.getVector(word2); if (v1 == null || v2 == null) return null; // Find the ranks of each of the two words to each other double rank1 = findRank(sspace, word1, word2); double rank2 = findRank(sspace, word2, word1); return 2d / (rank1 + rank2); }