/** * Compute the JS divergence between an instance and a cluster, used for training data * * @param instIdx index of the instance * @param input statistics of the input data * @param T the whole partition * @param t index of the cluster * @param pi1 * @param pi2 * @return the JS divergence */ private double JS(int instIdx, Input input, Partition T, int t, double pi1, double pi2) { if (Math.min(pi1, pi2) <= 0) { System.out.format( "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2); return 0; } Instance inst = m_data.instance(instIdx); double kl1 = 0.0, kl2 = 0.0, tmp = 0.0; for (int i = 0; i < inst.numValues(); i++) { tmp = input.Py_x.get(inst.index(i), instIdx); if (tmp != 0) { kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * T.Py_t.get(inst.index(i), t))); } } for (int i = 0; i < m_numAttributes; i++) { if ((tmp = T.Py_t.get(i, t)) != 0) { kl2 += tmp * Math.log(tmp / (input.Py_x.get(i, instIdx) * pi1 + pi2 * tmp)); } } return pi1 * kl1 + pi2 * kl2; }
/** * Put an instance into a new cluster and update. * * @param instIdx instance to be updated * @param newt index of the new cluster this instance has been assigned to * @param T the current working partition * @param Px an array of prior probabilities of the instances */ private void updateAssignment(int instIdx, int newt, Partition T, double Px, Matrix Py_x) { T.Pt_x[instIdx] = newt; // update probability of attributes in the cluster double mass = Px + T.Pt[newt]; double pi1 = Px / mass; double pi2 = T.Pt[newt] / mass; for (int i = 0; i < m_numAttributes; i++) { T.Py_t.set(i, newt, pi1 * Py_x.get(i, instIdx) + pi2 * T.Py_t.get(i, newt)); } T.Pt[newt] = mass; }