/** * Fast computation of ln(n!) for non-negative ints * * <p>negative ints are passed on to the general gamma-function based version in * weka.core.SpecialFunctions * * <p>if the current n value is higher than any previous one, the cache is extended and filled to * cover it * * <p>the common case is reduced to a simple array lookup * * @param n the integer * @return ln(n!) */ public double lnFactorial(int n) { if (n < 0) return weka.core.SpecialFunctions.lnFactorial(n); if (m_lnFactorialCache.length <= n) { double[] tmp = new double[n + 1]; System.arraycopy(m_lnFactorialCache, 0, tmp, 0, m_lnFactorialCache.length); for (int i = m_lnFactorialCache.length; i < tmp.length; i++) tmp[i] = tmp[i - 1] + Math.log(i); m_lnFactorialCache = tmp; } return m_lnFactorialCache[n]; }
/** * Test using Kononenko's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the split is acceptable */ private boolean KononenkosMDL( double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double distPrior, instPrior, distAfter = 0, sum, instAfter = 0; double before, after; int numClassesTotal; // Number of classes occuring in the set numClassesTotal = 0; for (double priorCount : priorCounts) { if (priorCount > 0) { numClassesTotal++; } } // Encode distribution prior to split distPrior = SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1); // Encode instances prior to split. instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts); before = instPrior + distPrior; // Encode distributions and instances after split. for (double[] bestCount : bestCounts) { sum = Utils.sum(bestCount); distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1); instAfter += SpecialFunctions.log2Multinomial(sum, bestCount); } // Coding cost after split after = Utils.log2(numCutPoints) + distAfter + instAfter; // Check if split is to be accepted return (before > after); }