예제 #1
0
  static double[] similarity(LatentDirichletAllocation lda0, LatentDirichletAllocation lda1) {

    int numTopics = lda0.numTopics();

    int numPairs = numTopics * (numTopics - 1);
    @SuppressWarnings({"unchecked", "rawtypes"}) // ok given use w. erasure
    ScoredObject<int[]>[] pairs = (ScoredObject<int[]>[]) new ScoredObject[numPairs];
    int pos = 0;
    for (int i = 0; i < numTopics; ++i) {
      for (int j = 0; j < numTopics; ++j) {
        if (i == j) continue;
        double divergence =
            Statistics.symmetrizedKlDivergence(
                lda0.wordProbabilities(i), lda1.wordProbabilities(j));
        pairs[pos++] = new ScoredObject<int[]>(new int[] {i, j}, divergence);
      }
    }
    Arrays.sort(pairs, ScoredObject.comparator());
    boolean[] taken0 = new boolean[numTopics];
    Arrays.fill(taken0, false);
    boolean[] taken1 = new boolean[numTopics];
    Arrays.fill(taken1, false);
    double[] scores = new double[numTopics];
    int scorePos = 0;
    for (pos = 0; pos < numPairs && scorePos < numTopics; ++pos) {
      int[] pair = pairs[pos].getObject();
      if (!taken0[pair[0]] && !taken1[pair[1]]) {
        taken0[pair[0]] = true;
        taken1[pair[1]] = true;
        scores[scorePos++] = pairs[pos].score();
      }
    }
    return scores;
  }
 /**
  * Returns a list of scored objects corresponding to the entries in decreasing order of value.
  *
  * @return Scored object list in decreasing order of value.
  */
 public List<ScoredObject<E>> scoredObjectsOrderedByValueList() {
   Set<Map.Entry<E, Double>> entrySet = entrySet();
   List<ScoredObject<E>> sos = new ArrayList<ScoredObject<E>>(entrySet.size());
   for (Map.Entry<E, Double> entry : entrySet) {
     E key = entry.getKey();
     double val = entry.getValue().doubleValue();
     sos.add(new ScoredObject<E>(key, val));
   }
   Collections.sort(sos, ScoredObject.reverseComparator());
   return sos;
 }