public boolean tryToExpand(double splitConfidence, double tieThreshold) { // splitConfidence. Hoeffding Bound test parameter. // tieThreshold. Hoeffding Bound test parameter. // SplitCriterion splitCriterion = new SDRSplitCriterionAMRules(); // SplitCriterion splitCriterion = new SDRSplitCriterionAMRulesNode();//JD for assessing only // best branch AMRulesSplitCriterion splitCriterion = (AMRulesSplitCriterion) ((AMRulesSplitCriterion) ((AMRulesRegressorOld) this.amRules) .splitCriterionOption.getPreMaterializedObject()) .copy(); // Using this criterion, find the best split per attribute and rank the results AttributeSplitSuggestion[] bestSplitSuggestions = this.getBestSplitSuggestions(splitCriterion); Arrays.sort(bestSplitSuggestions); // Declare a variable to determine if any of the splits should be performed boolean shouldSplit = false; // If only one split was returned, use it if (bestSplitSuggestions.length < 2) { shouldSplit = ((bestSplitSuggestions.length > 0) && (bestSplitSuggestions[0].merit > 0)); bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; } // Otherwise, consider which of the splits proposed may be worth trying else { // Determine the hoeffding bound value, used to select how many instances should be used to // make a test decision // to feel reasonably confident that the test chosen by this sample is the same as what would // be chosen using infinite examples double hoeffdingBound = computeHoeffdingBound(1, splitConfidence, getWeightSeen()); debug("Hoeffding bound " + hoeffdingBound, 4); // Determine the top two ranked splitting suggestions bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2]; debug("Merits: " + secondBestSuggestion.merit + " " + bestSuggestion.merit, 4); // If the upper bound of the sample mean for the ratio of SDR(best suggestion) to SDR(second // best suggestion), // as determined using the hoeffding bound, is less than 1, then the true mean is also less // than 1, and thus at this // particular moment of observation the bestSuggestion is indeed the best split option with // confidence 1-delta, and // splitting should occur. // Alternatively, if two or more splits are very similar or identical in terms of their // splits, then a threshold limit // (default 0.05) is applied to the hoeffding bound; if the hoeffding bound is smaller than // this limit then the two // competing attributes are equally good, and the split will be made on the one with the // higher SDR value. if (bestSuggestion.merit > 0) { // if ((((secondBestSuggestion.merit / bestSuggestion.merit) + hoeffdingBound) < 1) //ratio if ((((bestSuggestion.merit - secondBestSuggestion.merit)) > hoeffdingBound) // if normalized || (hoeffdingBound < tieThreshold)) { debug("Expanded ", 5); shouldSplit = true; } } } if (shouldSplit == true) { AttributeSplitSuggestion splitDecision = bestSplitSuggestions[bestSplitSuggestions.length - 1]; double minValue = Double.MAX_VALUE; double[] branchMerits = splitCriterion.computeBranchSplitMerits(bestSuggestion.resultingClassDistributions); for (int i = 0; i < bestSuggestion.numSplits(); i++) { double value = branchMerits[i]; if (value < minValue) { minValue = value; splitIndex = i; statisticsNewRuleActiveLearningNode = bestSuggestion.resultingClassDistributionFromSplit(i); } } statisticsBranchSplit = splitDecision.resultingClassDistributionFromSplit(splitIndex); statisticsOtherBranchSplit = bestSuggestion.resultingClassDistributionFromSplit(splitIndex == 0 ? 1 : 0); } return shouldSplit; }
@Override protected void attemptToSplit(ActiveLearningNode node, SplitNode parent, int parentIndex) { if (!node.observedClassDistributionIsPure()) { SplitCriterion splitCriterion = (SplitCriterion) getPreparedClassOption(this.splitCriterionOption); AttributeSplitSuggestion[] bestSplitSuggestions = node.getBestSplitSuggestions(splitCriterion, this); Arrays.sort(bestSplitSuggestions); boolean shouldSplit = false; if (bestSplitSuggestions.length < 2) { shouldSplit = bestSplitSuggestions.length > 0; } else { double hoeffdingBound = computeHoeffdingBound( splitCriterion.getRangeOfMerit(node.getObservedClassDistribution()), this.splitConfidenceOption.getValue(), node.getWeightSeen()); AttributeSplitSuggestion bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2]; if ((bestSuggestion.merit - secondBestSuggestion.merit > hoeffdingBound) || (hoeffdingBound < this.tieThresholdOption.getValue())) { shouldSplit = true; } // } if ((this.removePoorAttsOption != null) && this.removePoorAttsOption.isSet()) { Set<Integer> poorAtts = new HashSet<Integer>(); // scan 1 - add any poor to set for (int i = 0; i < bestSplitSuggestions.length; i++) { if (bestSplitSuggestions[i].splitTest != null) { int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); if (splitAtts.length == 1) { if (bestSuggestion.merit - bestSplitSuggestions[i].merit > hoeffdingBound) { poorAtts.add(new Integer(splitAtts[0])); } } } } // scan 2 - remove good ones from set for (int i = 0; i < bestSplitSuggestions.length; i++) { if (bestSplitSuggestions[i].splitTest != null) { int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); if (splitAtts.length == 1) { if (bestSuggestion.merit - bestSplitSuggestions[i].merit < hoeffdingBound) { poorAtts.remove(new Integer(splitAtts[0])); } } } } for (int poorAtt : poorAtts) { node.disableAttribute(poorAtt); } } } if (shouldSplit) { AttributeSplitSuggestion splitDecision = bestSplitSuggestions[bestSplitSuggestions.length - 1]; if (splitDecision.splitTest == null) { // preprune - null wins deactivateLearningNode(node, parent, parentIndex); } else { SplitNode newSplit = newSplitNode(splitDecision.splitTest, node.getObservedClassDistribution()); for (int i = 0; i < splitDecision.numSplits(); i++) { // Unique Change of HoeffdingTree Node newChild = newLearningNode( splitDecision.resultingClassDistributionFromSplit(i), ((LearningNodeClassifier) node).getClassifier()); newSplit.setChild(i, newChild); } this.activeLeafNodeCount--; this.decisionNodeCount++; this.activeLeafNodeCount += splitDecision.numSplits(); if (parent == null) { this.treeRoot = newSplit; } else { parent.setChild(parentIndex, newSplit); } } // manage memory enforceTrackerLimit(); } } }