/** * Makes a copy of the tree under this node * * @param up the parant node of the new node * @return a copy of the tree under this node * @exception Exception if something goes wrong */ public final M5TreeNode copy(M5TreeNode up) throws Exception { M5TreeNode node = new M5TreeNode(itemsets, upNode); node.type = type; node.splitAttr = splitAttr; node.splitValue = splitValue; node.unsmoothed = unsmoothed.copy(); node.smoothed = smoothed.copy(); node.valueNode = valueNode; node.upNode = up; if (errors == null) { node.errors = null; } else { node.errors = errors.copy(); } node.numParameters = node.numParameters; if (sf == null) { node.sf = null; } else { node.sf = sf.copy(); } node.itemsets = new MyDataset(itemsets, 0, itemsets.numItemsets()); node.lm = lm; node.model = model; node.pruningFactor = pruningFactor; node.deviation = deviation; if (leftNode != null) { node.leftNode = leftNode.copy(node); } else { node.leftNode = null; } if (rightNode != null) { node.rightNode = rightNode.copy(node); } else { node.rightNode = null; } return node; }
/** * Splits the node recursively, unless there are few itemsets or itemsets have similar values of * the class attribute * * @param inst itemsets * @exception Exception if something goes wrong */ public final void split(MyDataset inst) throws Exception { SplitInfo s, sMax; int j, partition; MyDataset leftInst, rightInst; itemsets = inst; if (itemsets.numItemsets() < SPLIT_NUM || M5.stdDev(itemsets.getClassIndex(), itemsets) < deviation * 0.05) { type = false; } else { sMax = new SplitInfo(0, itemsets.numItemsets() - 1, -1); s = new SplitInfo(0, itemsets.numItemsets() - 1, -1); for (j = 0; j < itemsets.numAttributes(); j++) { if (j != itemsets.getClassIndex()) { itemsets.sort(itemsets.getAttribute(j)); s.attrSplit(j, itemsets); if ((Math.abs(s.maxImpurity - sMax.maxImpurity) > 1.e-6) && (s.maxImpurity > sMax.maxImpurity + 1.e-6)) { sMax = s.copy(); } } } if (sMax.splitAttr < 0 || sMax.position < 1 || sMax.position > itemsets.numItemsets() - 1) { type = false; } if (type == true) { sf = sMax; splitAttr = sMax.splitAttr; // split attribute splitValue = sMax.splitValue; // split value unsmoothed = new Function(splitAttr); // unsmoothed function leftInst = new MyDataset(itemsets, itemsets.numItemsets()); rightInst = new MyDataset(itemsets, itemsets.numItemsets()); int nmissings = 0, missings[] = new int[itemsets.numItemsets()]; for (int i = 0; i < itemsets.numItemsets(); i++) { if (!itemsets.isMissing(i, splitAttr)) { if (itemsets.itemset(i).getValue(splitAttr) <= splitValue) { leftInst.addItemset(itemsets.itemset(i)); } else { rightInst.addItemset(itemsets.itemset(i)); } } else { missings[nmissings] = i; nmissings++; } } // Missing values treatment if (nmissings > 0) { // Calculate the average class value double avgRight = 0.0, avgLeft = 0.0; if (itemsets.getAttribute(splitAttr).isEnumerate()) { avgRight = rightInst.averageClassValue(); avgLeft = leftInst.averageClassValue(); } else { if (rightInst.numItemsets() > 3) { rightInst.sort(splitAttr); int n = rightInst.numItemsets(); double sum = rightInst.itemset(n - 1).getClassValue() + rightInst.itemset(n - 2).getClassValue() + rightInst.itemset(n - 3).getClassValue(); avgRight = sum / ((double) n); } else { avgRight = rightInst.averageClassValue(); } if (leftInst.numItemsets() > 3) { leftInst.sort(splitAttr); int n = leftInst.numItemsets(); double sum = leftInst.itemset(0).getClassValue() + leftInst.itemset(1).getClassValue() + leftInst.itemset(2).getClassValue(); avgLeft = sum / ((double) n); } else { avgLeft = leftInst.averageClassValue(); } } double avgClassValue = (avgRight + avgLeft) / 2.0; // Give out the missing instances for (int i = 0; i < nmissings; i++) { if (itemsets.itemset(missings[i]).getClassValue() <= avgClassValue) { if (avgRight <= avgLeft) { rightInst.addItemset(itemsets.itemset(missings[i])); } else { leftInst.addItemset(itemsets.itemset(missings[i])); } } else { if (avgRight > avgLeft) { rightInst.addItemset(itemsets.itemset(missings[i])); } else { leftInst.addItemset(itemsets.itemset(missings[i])); } } } } leftInst.compactify(); rightInst.compactify(); leftNode = new M5TreeNode(leftInst, this); leftNode.split(leftInst); // split left node rightNode = new M5TreeNode(rightInst, this); rightNode.split(rightInst); // split right node // Give the missing values the average value for the splitting attribute if (nmissings > 0) { double avgAtt = itemsets.averageValue(splitAttr); for (int i = 0; i < nmissings; i++) itemsets.itemset(missings[i]).setValue(splitAttr, avgAtt); } this.valueNode(); // function of the constant value if (model != REGRESSION_TREE) { unsmoothed = Function.combine(unsmoothed, leftNode.unsmoothed); // passes up the attributes found under the left node unsmoothed = Function.combine(unsmoothed, rightNode.unsmoothed); // passes up the attributes found under the right node } else { unsmoothed = new Function(); } } } if (type == false) { // a leaf node this.leafNode(); errors = unsmoothed.errors(itemsets); } }