예제 #1
0
  /**
   * Fast computation of ln(n!) for non-negative ints
   *
   * <p>negative ints are passed on to the general gamma-function based version in
   * weka.core.SpecialFunctions
   *
   * <p>if the current n value is higher than any previous one, the cache is extended and filled to
   * cover it
   *
   * <p>the common case is reduced to a simple array lookup
   *
   * @param n the integer
   * @return ln(n!)
   */
  public double lnFactorial(int n) {
    if (n < 0) return weka.core.SpecialFunctions.lnFactorial(n);

    if (m_lnFactorialCache.length <= n) {
      double[] tmp = new double[n + 1];
      System.arraycopy(m_lnFactorialCache, 0, tmp, 0, m_lnFactorialCache.length);
      for (int i = m_lnFactorialCache.length; i < tmp.length; i++)
        tmp[i] = tmp[i - 1] + Math.log(i);
      m_lnFactorialCache = tmp;
    }

    return m_lnFactorialCache[n];
  }
예제 #2
0
  /**
   * Test using Kononenko's MDL criterion.
   *
   * @param priorCounts
   * @param bestCounts
   * @param numInstances
   * @param numCutPoints
   * @return true if the split is acceptable
   */
  private boolean KononenkosMDL(
      double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) {

    double distPrior, instPrior, distAfter = 0, sum, instAfter = 0;
    double before, after;
    int numClassesTotal;

    // Number of classes occuring in the set
    numClassesTotal = 0;
    for (double priorCount : priorCounts) {
      if (priorCount > 0) {
        numClassesTotal++;
      }
    }

    // Encode distribution prior to split
    distPrior =
        SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1);

    // Encode instances prior to split.
    instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts);

    before = instPrior + distPrior;

    // Encode distributions and instances after split.
    for (double[] bestCount : bestCounts) {
      sum = Utils.sum(bestCount);
      distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1);
      instAfter += SpecialFunctions.log2Multinomial(sum, bestCount);
    }

    // Coding cost after split
    after = Utils.log2(numCutPoints) + distAfter + instAfter;

    // Check if split is to be accepted
    return (before > after);
  }