/** * Compute the JS divergence between an instance and a cluster, used for training data * * @param instIdx index of the instance * @param input statistics of the input data * @param T the whole partition * @param t index of the cluster * @param pi1 * @param pi2 * @return the JS divergence */ private double JS(int instIdx, Input input, Partition T, int t, double pi1, double pi2) { if (Math.min(pi1, pi2) <= 0) { System.out.format( "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2); return 0; } Instance inst = m_data.instance(instIdx); double kl1 = 0.0, kl2 = 0.0, tmp = 0.0; for (int i = 0; i < inst.numValues(); i++) { tmp = input.Py_x.get(inst.index(i), instIdx); if (tmp != 0) { kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * T.Py_t.get(inst.index(i), t))); } } for (int i = 0; i < m_numAttributes; i++) { if ((tmp = T.Py_t.get(i, t)) != 0) { kl2 += tmp * Math.log(tmp / (input.Py_x.get(i, instIdx) * pi1 + pi2 * tmp)); } } return pi1 * kl1 + pi2 * kl2; }
/** * Process the input and compute the statistics of the training data * * @return an Input object which holds the statistics about the training data */ private Input sIB_ProcessInput() { double valSum = 0.0; for (int i = 0; i < m_numInstances; i++) { valSum = 0.0; for (int v = 0; v < m_data.instance(i).numValues(); v++) { valSum += m_data.instance(i).valueSparse(v); } if (valSum <= 0) { if (m_verbose) { System.out.format("Instance %s sum of value = %s <= 0, removed.\n", i, valSum); } m_data.delete(i); m_numInstances--; } } // get the term-document matrix Input input = new Input(); input.Py_x = getTransposedNormedMatrix(m_data); if (m_uniformPrior) { input.Pyx = input.Py_x.copy(); normalizePrior(m_data); } else { input.Pyx = getTransposedMatrix(m_data); } input.sumVals = getTotalSum(m_data); input.Pyx.timesEquals(1 / input.sumVals); // prior probability of documents, ie. sum the columns from the Pyx matrix input.Px = new double[m_numInstances]; for (int i = 0; i < m_numInstances; i++) { for (int j = 0; j < m_numAttributes; j++) { input.Px[i] += input.Pyx.get(j, i); } } // prior probability of terms, ie. sum the rows from the Pyx matrix input.Py = new double[m_numAttributes]; for (int i = 0; i < input.Pyx.getRowDimension(); i++) { for (int j = 0; j < input.Pyx.getColumnDimension(); j++) { input.Py[i] += input.Pyx.get(i, j); } } MI(input.Pyx, input); return input; }