/** * Calculates the minimum description length of this node, i.e., the length of the binary * encoding that describes the feature and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses - 1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses / 2.0); double gammaVal = Maths.gamma(numClasses / 2.0); mdl += Math.log(piPow / gammaVal) / GainRatio.log2; return mdl; }
/** * Initialize this separate model using a complete list. * * @param documents * @param testStartIndex */ public void divideDocuments(InstanceList documents, int testStartIndex) { Alphabet dataAlpha = documents.getDataAlphabet(); Alphabet targetAlpha = documents.getTargetAlphabet(); this.training = new InstanceList(dataAlpha, targetAlpha); this.test = new InstanceList(dataAlpha, targetAlpha); int di = 0; for (di = 0; di < testStartIndex; di++) { training.add(documents.get(di)); } for (di = testStartIndex; di < documents.size(); di++) { test.add(documents.get(di)); } }