public void terminate(ClusModel model) throws IOException { CompleteTreeIterator iter = new CompleteTreeIterator((ClusNode) model); while (iter.hasMoreNodes()) { ClusNode node = (ClusNode) iter.getNextNode(); node.getClusteringStat().calcMean(); } }
public void initialize(ClusModel model, ClusSchema schema) { CompleteTreeIterator iter = new CompleteTreeIterator((ClusNode) model); while (iter.hasMoreNodes()) { ClusNode node = (ClusNode) iter.getNextNode(); ClusStatistic stat = m_Clone.cloneStat(); node.setClusteringStat(stat); stat.setSDataSize(1); } }
public final void printTree(PrintWriter writer, String prefix) { int lvc = 0; for (int i = 0; i < m_Folds.length; i++) { ClusNode node = m_Nodes[i]; if (!node.hasBestTest()) { if (lvc != 0) writer.print(", "); writer.print(m_Folds[i] + ": "); writer.print(ClusFormat.ONE_AFTER_DOT.format(node.getTotWeight())); lvc++; } } if (lvc > 0) { writer.print(" "); showPath(getPath(), writer); } int nb = getNbChildren(); if (nb > 0) { if (lvc > 0) { writer.println(); writer.print(prefix); } } else { writer.println(); } for (int i = 0; i < nb; i++) { OptXValSplit split = (OptXValSplit) getChild(i); if (i != 0) { writer.println(prefix + "| "); writer.print(prefix); } writer.print("G" + i + " "); writer.print(MyIntArray.print(split.getFolds())); writer.print(" - "); writer.print(split.getTest().getString()); writer.println(); int mb = split.getNbChildren(); String gfix = (i != nb - 1) ? "| " : " "; for (int j = 0; j < mb; j++) { OptXValNode node = (OptXValNode) split.getChild(j); String suffix = (j != mb - 1) ? "| " : " "; if (j == 0) writer.print(prefix + gfix + "+-yes: "); else { writer.println(prefix + gfix + "|"); writer.print(prefix + gfix + "+-no: "); } node.printTree(writer, prefix + gfix + suffix); } } writer.flush(); }
public void prune(ClusNode node) { RegressionStat stat = (RegressionStat) node.getClusteringStat(); m_GlobalDeviation = Math.sqrt(stat.getSVarS(m_ClusteringWeights) / stat.getTotalWeight()); pruneRecursive(node); // System.out.println("Performing test of M5 pruning"); // TestM5PruningRuleNode.performTest(orig, node, m_GlobalDeviation, m_TargetWeights, // m_TrainingData); }
public final ClusNode getTree(int fold) { int idx = Arrays.binarySearch(m_Folds, fold); ClusNode node = m_Nodes[idx]; if (node.hasBestTest() && node.atBottomLevel()) { OptXValSplit split = null; int nb = getNbChildren(); for (int i = 0; i < nb; i++) { OptXValSplit msplit = (OptXValSplit) getChild(i); if (msplit.contains(fold)) { split = msplit; break; } } int arity = node.updateArity(); for (int i = 0; i < arity; i++) { OptXValNode subnode = (OptXValNode) split.getChild(i); node.setChild(subnode.getTree(fold), i); } } return node; }
public void pruneRecursive(ClusNode node) { if (node.atBottomLevel()) { return; } for (int i = 0; i < node.getNbChildren(); i++) { ClusNode child = (ClusNode) node.getChild(i); pruneRecursive(child); } RegressionStat stat = (RegressionStat) node.getClusteringStat(); double rmsLeaf = stat.getRMSE(m_ClusteringWeights); double adjustedErrorLeaf = rmsLeaf * pruningFactor(stat.getTotalWeight(), 1); double rmsSubTree = Math.sqrt(node.estimateClusteringSS(m_ClusteringWeights) / stat.getTotalWeight()); double adjustedErrorTree = rmsSubTree * pruningFactor(stat.getTotalWeight(), node.getModelSize()); // System.out.println("C leaf: "+rmsLeaf+" tree: "+rmsSubTree); // System.out.println("C leafadj: "+adjustedErrorLeaf +" treeadj: "+rmsSubTree); if ((adjustedErrorLeaf <= adjustedErrorTree) || (adjustedErrorLeaf < (m_GlobalDeviation * 0.00001))) { node.makeLeaf(); } }
public void modelUpdate(DataTuple tuple, ClusModel model) { ClusNode node = (ClusNode) model; node.getClusteringStat().updateWeighted(tuple, 0); }