/** * Recreate a tree node content with the specified data based on the tree node's existing content. * * @param first The start(inclusive) index of the train data used for creating the tree node * content. * @param last The end(exclusive) index of the train data used for creating the tree node content. * @param node The tree node whose content need to be recreated. * @return The recreated tree node content. */ private TreeNodeContent createContent(int first, int last, TreeNode node) { // Compute the total weight and its class distribution of [first // last) prune cases float totalWeight = 0; AttributeDelegate classAttributeDelegate = this.attributeDelegates[dataSet.getClassAttributeIndex()]; float[] totalClassDistri = new float[this.dataSet.getClassCount()]; Arrays.fill(totalClassDistri, 0); for (int i = first; i < last; i++) { int classLabel = classAttributeDelegate.getClassBranch(this.cases[i]); totalClassDistri[classLabel] += this.weight[cases[i]]; } // Find the original classification of the tree node String nodeClassification = node.getContent().getClassification(); String[] classValues = this.dataSet.getClassValues(); int maxClassIndex = indexOf(nodeClassification, classValues); // Find the most probable classification of the prune data on // the current tree node for (int i = 0; i < totalClassDistri.length; i++) { totalWeight += totalClassDistri[i]; if (totalClassDistri[i] > totalClassDistri[maxClassIndex]) maxClassIndex = i; } String classification = classValues[maxClassIndex]; // Estimate the leafError of the tree node with the [first last) // prune data float basicLeafError = totalWeight - totalClassDistri[maxClassIndex]; float extraLeafError = Estimator.getExtraError(totalWeight, basicLeafError); float estimatedLeafError = basicLeafError + extraLeafError; return new TreeNodeContent(totalWeight, totalClassDistri, classification, estimatedLeafError); }
/** Initialize a tree pruner which prunes the built decision tree */ TreePruner( DataSet dataSet, AttributeDelegate[] attributeDelegates, TreeNode root, DecisionTree dt) { this.dataSet = dataSet; this.attributeDelegates = attributeDelegates; this.root = root; this.dt = dt; // ReInitialize the data sequence and their weight int caseCount = dataSet.getCaseCount(); this.cases = new int[caseCount]; for (int i = 0; i < this.cases.length; i++) this.cases[i] = i; this.weight = new float[caseCount]; Arrays.fill(this.weight, 1.0f); // Reset the cases and weight array of all attributes delegate // objects for (AttributeDelegate attributeDelegate : attributeDelegates) { attributeDelegate.setCasesWeight(this.cases, this.weight); } /* float errorAfterPrune = */ ebpPrune(this.root, 0, caseCount, true); }