Ejemplo n.º 1
0
  static double[] similarity(LatentDirichletAllocation lda0, LatentDirichletAllocation lda1) {

    int numTopics = lda0.numTopics();

    int numPairs = numTopics * (numTopics - 1);
    @SuppressWarnings({"unchecked", "rawtypes"}) // ok given use w. erasure
    ScoredObject<int[]>[] pairs = (ScoredObject<int[]>[]) new ScoredObject[numPairs];
    int pos = 0;
    for (int i = 0; i < numTopics; ++i) {
      for (int j = 0; j < numTopics; ++j) {
        if (i == j) continue;
        double divergence =
            Statistics.symmetrizedKlDivergence(
                lda0.wordProbabilities(i), lda1.wordProbabilities(j));
        pairs[pos++] = new ScoredObject<int[]>(new int[] {i, j}, divergence);
      }
    }
    Arrays.sort(pairs, ScoredObject.comparator());
    boolean[] taken0 = new boolean[numTopics];
    Arrays.fill(taken0, false);
    boolean[] taken1 = new boolean[numTopics];
    Arrays.fill(taken1, false);
    double[] scores = new double[numTopics];
    int scorePos = 0;
    for (pos = 0; pos < numPairs && scorePos < numTopics; ++pos) {
      int[] pair = pairs[pos].getObject();
      if (!taken0[pair[0]] && !taken1[pair[1]]) {
        taken0[pair[0]] = true;
        taken1[pair[1]] = true;
        scores[scorePos++] = pairs[pos].score();
      }
    }
    return scores;
  }