Пример #1
0
 /**
  * Returns the ranking <i>k</i> such that {@code other} is the k<sup>th</th> most similar word to
  * {@code target} in the semantic space.
  */
 private int findRank(SemanticSpace sspace, String target, String other) {
   Vector v1 = sspace.getVector(target);
   Vector v2 = sspace.getVector(other);
   // Compute the base similarity between the two words
   double baseSim = Similarity.cosineSimilarity(v1, v2);
   int rank = 0;
   // Next, count how many words are more similar to the target than the
   // other word is
   for (String word : sspace.getWords()) {
     Vector v = sspace.getVector(word);
     double sim = Similarity.cosineSimilarity(v1, v);
     if (sim > baseSim) rank++;
   }
   return rank;
 }
Пример #2
0
  /**
   * Computes the similarity of the two edges as the Jaccard index of the neighbors of two impost
   * nodes. The impost nodes are the two nodes the edges do not have in common. Subclasses may
   * override this method to define a new method for computing edge similarity.
   *
   * <p><i>Implementation Note</i>: Subclasses that wish to override this behavior should be aware
   * that this method is likely to be called by multiple threads and therefor should make provisions
   * to be thread safe. In addition, this method may be called more than once per edge pair if the
   * similarity matrix is being computed on-the-fly.
   *
   * @param sm a matrix containing the connections between edges. A non-zero value in location (i,j)
   *     indicates a node <i>i</i> is connected to node <i>j</i> by an edge.
   * @param e1 an edge to be compared with {@code e2}
   * @param e2 an edge to be compared with {@code e1}
   * @return the similarity of the edges.a
   */
  protected double getEdgeSimilarity(SparseMatrix sm, Edge e1, Edge e2) {
    // Determing the keystone (shared) node by the edges and the other two
    // impost (unshared) nodes.
    int keystone = -1;
    int impost1 = -1;
    int impost2 = -1;
    if (e1.from == e2.from) {
      keystone = e1.from;
      impost1 = e1.to;
      impost2 = e2.to;
    } else if (e1.from == e2.to) {
      keystone = e1.from;
      impost1 = e1.to;
      impost2 = e2.from;
    } else if (e2.to == e1.from) {
      keystone = e1.from;
      impost1 = e1.to;
      impost2 = e2.from;
    } else if (e1.to == e2.to) {
      keystone = e1.to;
      impost1 = e1.from;
      impost2 = e2.from;
    } else return 0d;

    // Determine the overlap between the neighbors of the impost nodes
    int[] impost1edges = getImpostNeighbors(sm, impost1);
    int[] impost2edges = getImpostNeighbors(sm, impost2);
    double similarity = Similarity.jaccardIndex(impost1edges, impost2edges);
    return similarity;
  }
Пример #3
0
 /**
  * Compares the specified row to all other rows, returning the k-nearest rows according to the
  * similarity metric.
  *
  * @param m The {@link Matrix} containing data points to be compared
  * @param row The current row in {@code m} to be compared against all other rows
  * @param kNearestRows The number of most similar rows to retain
  * @param similarityType The similarity method to use when comparing rows
  * @return a mapping from the similarity to the {@code kNearestRows} most similar rows
  */
 public SortedMultiMap<Double, Integer> getMostSimilar(
     Matrix m, int row, int kNearestRows, Similarity.SimType similarityType) {
   return getMostSimilar(m, row, kNearestRows, Similarity.getSimilarityFunction(similarityType));
 }