/** * Calculates the minimum description length of this node, i.e., the length of the binary * encoding that describes the feature and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses - 1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses / 2.0); double gammaVal = Maths.gamma(numClasses / 2.0); mdl += Math.log(piPow / gammaVal) / GainRatio.log2; return mdl; }
public double computeCostAndPrune() { double costS = getMDL(); if (isLeaf()) return costS + 1; double minCost1 = getLeftChild().computeCostAndPrune(); double minCost2 = getRightChild().computeCostAndPrune(); double costSplit = Math.log(m_gainRatio.getNumSplitPointsForBestFeature()) / GainRatio.log2; double minCostN = Math.min(costS + 1, costSplit + 1 + minCost1 + minCost2); if (Maths.almostEquals(minCostN, costS + 1)) m_leftChild = m_rightChild = null; return minCostN; }