示例#1
0
文件: C45.java 项目: alei76/tctm
 /**
  * Calculates the minimum description length of this node, i.e., the length of the binary
  * encoding that describes the feature and the split value used at this node
  */
 public double getMDL() {
   int numClasses = m_ilist.getTargetAlphabet().size();
   double mdl = getSize() * getGainRatio().getBaseEntropy();
   mdl += ((numClasses - 1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2);
   double piPow = Math.pow(Math.PI, numClasses / 2.0);
   double gammaVal = Maths.gamma(numClasses / 2.0);
   mdl += Math.log(piPow / gammaVal) / GainRatio.log2;
   return mdl;
 }
示例#2
0
文件: C45.java 项目: alei76/tctm
    public double computeCostAndPrune() {
      double costS = getMDL();

      if (isLeaf()) return costS + 1;

      double minCost1 = getLeftChild().computeCostAndPrune();
      double minCost2 = getRightChild().computeCostAndPrune();
      double costSplit = Math.log(m_gainRatio.getNumSplitPointsForBestFeature()) / GainRatio.log2;
      double minCostN = Math.min(costS + 1, costSplit + 1 + minCost1 + minCost2);

      if (Maths.almostEquals(minCostN, costS + 1)) m_leftChild = m_rightChild = null;

      return minCostN;
    }