@Override protected void attemptToSplit(ActiveLearningNode node, SplitNode parent, int parentIndex) { if (!node.observedClassDistributionIsPure()) { SplitCriterion splitCriterion = (SplitCriterion) getPreparedClassOption(this.splitCriterionOption); AttributeSplitSuggestion[] bestSplitSuggestions = node.getBestSplitSuggestions(splitCriterion, this); Arrays.sort(bestSplitSuggestions); boolean shouldSplit = false; if (bestSplitSuggestions.length < 2) { shouldSplit = bestSplitSuggestions.length > 0; } else { double hoeffdingBound = computeHoeffdingBound( splitCriterion.getRangeOfMerit(node.getObservedClassDistribution()), this.splitConfidenceOption.getValue(), node.getWeightSeen()); AttributeSplitSuggestion bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2]; if ((bestSuggestion.merit - secondBestSuggestion.merit > hoeffdingBound) || (hoeffdingBound < this.tieThresholdOption.getValue())) { shouldSplit = true; } // } if ((this.removePoorAttsOption != null) && this.removePoorAttsOption.isSet()) { Set<Integer> poorAtts = new HashSet<Integer>(); // scan 1 - add any poor to set for (int i = 0; i < bestSplitSuggestions.length; i++) { if (bestSplitSuggestions[i].splitTest != null) { int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); if (splitAtts.length == 1) { if (bestSuggestion.merit - bestSplitSuggestions[i].merit > hoeffdingBound) { poorAtts.add(new Integer(splitAtts[0])); } } } } // scan 2 - remove good ones from set for (int i = 0; i < bestSplitSuggestions.length; i++) { if (bestSplitSuggestions[i].splitTest != null) { int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); if (splitAtts.length == 1) { if (bestSuggestion.merit - bestSplitSuggestions[i].merit < hoeffdingBound) { poorAtts.remove(new Integer(splitAtts[0])); } } } } for (int poorAtt : poorAtts) { node.disableAttribute(poorAtt); } } } if (shouldSplit) { AttributeSplitSuggestion splitDecision = bestSplitSuggestions[bestSplitSuggestions.length - 1]; if (splitDecision.splitTest == null) { // preprune - null wins deactivateLearningNode(node, parent, parentIndex); } else { SplitNode newSplit = newSplitNode(splitDecision.splitTest, node.getObservedClassDistribution()); for (int i = 0; i < splitDecision.numSplits(); i++) { // Unique Change of HoeffdingTree Node newChild = newLearningNode( splitDecision.resultingClassDistributionFromSplit(i), ((LearningNodeClassifier) node).getClassifier()); newSplit.setChild(i, newChild); } this.activeLeafNodeCount--; this.decisionNodeCount++; this.activeLeafNodeCount += splitDecision.numSplits(); if (parent == null) { this.treeRoot = newSplit; } else { parent.setChild(parentIndex, newSplit); } } // manage memory enforceTrackerLimit(); } } }