예제 #1
0
 /**
  * Compute the JS divergence between an instance and a cluster, used for training data
  *
  * @param instIdx index of the instance
  * @param input statistics of the input data
  * @param T the whole partition
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
  */
 private double JS(int instIdx, Input input, Partition T, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
     System.out.format(
         "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   }
   Instance inst = m_data.instance(instIdx);
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = input.Py_x.get(inst.index(i), instIdx);
     if (tmp != 0) {
       kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * T.Py_t.get(inst.index(i), t)));
     }
   }
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = T.Py_t.get(i, t)) != 0) {
       kl2 += tmp * Math.log(tmp / (input.Py_x.get(i, instIdx) * pi1 + pi2 * tmp));
     }
   }
   return pi1 * kl1 + pi2 * kl2;
 }
예제 #2
0
  /**
   * Process the input and compute the statistics of the training data
   *
   * @return an Input object which holds the statistics about the training data
   */
  private Input sIB_ProcessInput() {
    double valSum = 0.0;
    for (int i = 0; i < m_numInstances; i++) {
      valSum = 0.0;
      for (int v = 0; v < m_data.instance(i).numValues(); v++) {
        valSum += m_data.instance(i).valueSparse(v);
      }
      if (valSum <= 0) {
        if (m_verbose) {
          System.out.format("Instance %s sum of value = %s <= 0, removed.\n", i, valSum);
        }
        m_data.delete(i);
        m_numInstances--;
      }
    }

    // get the term-document matrix
    Input input = new Input();
    input.Py_x = getTransposedNormedMatrix(m_data);
    if (m_uniformPrior) {
      input.Pyx = input.Py_x.copy();
      normalizePrior(m_data);
    } else {
      input.Pyx = getTransposedMatrix(m_data);
    }
    input.sumVals = getTotalSum(m_data);
    input.Pyx.timesEquals(1 / input.sumVals);

    // prior probability of documents, ie. sum the columns from the Pyx matrix
    input.Px = new double[m_numInstances];
    for (int i = 0; i < m_numInstances; i++) {
      for (int j = 0; j < m_numAttributes; j++) {
        input.Px[i] += input.Pyx.get(j, i);
      }
    }

    // prior probability of terms, ie. sum the rows from the Pyx matrix
    input.Py = new double[m_numAttributes];
    for (int i = 0; i < input.Pyx.getRowDimension(); i++) {
      for (int j = 0; j < input.Pyx.getColumnDimension(); j++) {
        input.Py[i] += input.Pyx.get(i, j);
      }
    }

    MI(input.Pyx, input);
    return input;
  }