/** * Returns the ranking <i>k</i> such that {@code other} is the k<sup>th</th> most similar word to * {@code target} in the semantic space. */ private int findRank(SemanticSpace sspace, String target, String other) { Vector v1 = sspace.getVector(target); Vector v2 = sspace.getVector(other); // Compute the base similarity between the two words double baseSim = Similarity.cosineSimilarity(v1, v2); int rank = 0; // Next, count how many words are more similar to the target than the // other word is for (String word : sspace.getWords()) { Vector v = sspace.getVector(word); double sim = Similarity.cosineSimilarity(v1, v); if (sim > baseSim) rank++; } return rank; }
/** * Computes the similarity of the two edges as the Jaccard index of the neighbors of two impost * nodes. The impost nodes are the two nodes the edges do not have in common. Subclasses may * override this method to define a new method for computing edge similarity. * * <p><i>Implementation Note</i>: Subclasses that wish to override this behavior should be aware * that this method is likely to be called by multiple threads and therefor should make provisions * to be thread safe. In addition, this method may be called more than once per edge pair if the * similarity matrix is being computed on-the-fly. * * @param sm a matrix containing the connections between edges. A non-zero value in location (i,j) * indicates a node <i>i</i> is connected to node <i>j</i> by an edge. * @param e1 an edge to be compared with {@code e2} * @param e2 an edge to be compared with {@code e1} * @return the similarity of the edges.a */ protected double getEdgeSimilarity(SparseMatrix sm, Edge e1, Edge e2) { // Determing the keystone (shared) node by the edges and the other two // impost (unshared) nodes. int keystone = -1; int impost1 = -1; int impost2 = -1; if (e1.from == e2.from) { keystone = e1.from; impost1 = e1.to; impost2 = e2.to; } else if (e1.from == e2.to) { keystone = e1.from; impost1 = e1.to; impost2 = e2.from; } else if (e2.to == e1.from) { keystone = e1.from; impost1 = e1.to; impost2 = e2.from; } else if (e1.to == e2.to) { keystone = e1.to; impost1 = e1.from; impost2 = e2.from; } else return 0d; // Determine the overlap between the neighbors of the impost nodes int[] impost1edges = getImpostNeighbors(sm, impost1); int[] impost2edges = getImpostNeighbors(sm, impost2); double similarity = Similarity.jaccardIndex(impost1edges, impost2edges); return similarity; }
/** * Compares the specified row to all other rows, returning the k-nearest rows according to the * similarity metric. * * @param m The {@link Matrix} containing data points to be compared * @param row The current row in {@code m} to be compared against all other rows * @param kNearestRows The number of most similar rows to retain * @param similarityType The similarity method to use when comparing rows * @return a mapping from the similarity to the {@code kNearestRows} most similar rows */ public SortedMultiMap<Double, Integer> getMostSimilar( Matrix m, int row, int kNearestRows, Similarity.SimType similarityType) { return getMostSimilar(m, row, kNearestRows, Similarity.getSimilarityFunction(similarityType)); }