コード例 #1
0
  public boolean tryToExpand(double splitConfidence, double tieThreshold) {

    // splitConfidence. Hoeffding Bound test parameter.
    // tieThreshold. Hoeffding Bound test parameter.
    // SplitCriterion splitCriterion = new SDRSplitCriterionAMRules();
    // SplitCriterion splitCriterion = new SDRSplitCriterionAMRulesNode();//JD for assessing only
    // best branch
    AMRulesSplitCriterion splitCriterion =
        (AMRulesSplitCriterion)
            ((AMRulesSplitCriterion)
                    ((AMRulesRegressorOld) this.amRules)
                        .splitCriterionOption.getPreMaterializedObject())
                .copy();

    // Using this criterion, find the best split per attribute and rank the results
    AttributeSplitSuggestion[] bestSplitSuggestions = this.getBestSplitSuggestions(splitCriterion);
    Arrays.sort(bestSplitSuggestions);
    // Declare a variable to determine if any of the splits should be performed
    boolean shouldSplit = false;

    // If only one split was returned, use it
    if (bestSplitSuggestions.length < 2) {
      shouldSplit = ((bestSplitSuggestions.length > 0) && (bestSplitSuggestions[0].merit > 0));
      bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1];
    } // Otherwise, consider which of the splits proposed may be worth trying
    else {
      // Determine the hoeffding bound value, used to select how many instances should be used to
      // make a test decision
      // to feel reasonably confident that the test chosen by this sample is the same as what would
      // be chosen using infinite examples
      double hoeffdingBound = computeHoeffdingBound(1, splitConfidence, getWeightSeen());
      debug("Hoeffding bound " + hoeffdingBound, 4);
      // Determine the top two ranked splitting suggestions
      bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1];
      AttributeSplitSuggestion secondBestSuggestion =
          bestSplitSuggestions[bestSplitSuggestions.length - 2];

      debug("Merits: " + secondBestSuggestion.merit + " " + bestSuggestion.merit, 4);

      // If the upper bound of the sample mean for the ratio of SDR(best suggestion) to SDR(second
      // best suggestion),
      // as determined using the hoeffding bound, is less than 1, then the true mean is also less
      // than 1, and thus at this
      // particular moment of observation the bestSuggestion is indeed the best split option with
      // confidence 1-delta, and
      // splitting should occur.
      // Alternatively, if two or more splits are very similar or identical in terms of their
      // splits, then a threshold limit
      // (default 0.05) is applied to the hoeffding bound; if the hoeffding bound is smaller than
      // this limit then the two
      // competing attributes are equally good, and the split will be made on the one with the
      // higher SDR value.

      if (bestSuggestion.merit > 0) {
        // if ((((secondBestSuggestion.merit / bestSuggestion.merit) + hoeffdingBound) < 1) //ratio
        if ((((bestSuggestion.merit - secondBestSuggestion.merit))
                > hoeffdingBound) // if normalized
            || (hoeffdingBound < tieThreshold)) {
          debug("Expanded ", 5);
          shouldSplit = true;
        }
      }
    }

    if (shouldSplit == true) {
      AttributeSplitSuggestion splitDecision =
          bestSplitSuggestions[bestSplitSuggestions.length - 1];
      double minValue = Double.MAX_VALUE;
      double[] branchMerits =
          splitCriterion.computeBranchSplitMerits(bestSuggestion.resultingClassDistributions);

      for (int i = 0; i < bestSuggestion.numSplits(); i++) {
        double value = branchMerits[i];
        if (value < minValue) {
          minValue = value;
          splitIndex = i;
          statisticsNewRuleActiveLearningNode =
              bestSuggestion.resultingClassDistributionFromSplit(i);
        }
      }
      statisticsBranchSplit = splitDecision.resultingClassDistributionFromSplit(splitIndex);
      statisticsOtherBranchSplit =
          bestSuggestion.resultingClassDistributionFromSplit(splitIndex == 0 ? 1 : 0);
    }
    return shouldSplit;
  }
コード例 #2
0
 @Override
 protected void attemptToSplit(ActiveLearningNode node, SplitNode parent, int parentIndex) {
   if (!node.observedClassDistributionIsPure()) {
     SplitCriterion splitCriterion =
         (SplitCriterion) getPreparedClassOption(this.splitCriterionOption);
     AttributeSplitSuggestion[] bestSplitSuggestions =
         node.getBestSplitSuggestions(splitCriterion, this);
     Arrays.sort(bestSplitSuggestions);
     boolean shouldSplit = false;
     if (bestSplitSuggestions.length < 2) {
       shouldSplit = bestSplitSuggestions.length > 0;
     } else {
       double hoeffdingBound =
           computeHoeffdingBound(
               splitCriterion.getRangeOfMerit(node.getObservedClassDistribution()),
               this.splitConfidenceOption.getValue(),
               node.getWeightSeen());
       AttributeSplitSuggestion bestSuggestion =
           bestSplitSuggestions[bestSplitSuggestions.length - 1];
       AttributeSplitSuggestion secondBestSuggestion =
           bestSplitSuggestions[bestSplitSuggestions.length - 2];
       if ((bestSuggestion.merit - secondBestSuggestion.merit > hoeffdingBound)
           || (hoeffdingBound < this.tieThresholdOption.getValue())) {
         shouldSplit = true;
       }
       // }
       if ((this.removePoorAttsOption != null) && this.removePoorAttsOption.isSet()) {
         Set<Integer> poorAtts = new HashSet<Integer>();
         // scan 1 - add any poor to set
         for (int i = 0; i < bestSplitSuggestions.length; i++) {
           if (bestSplitSuggestions[i].splitTest != null) {
             int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn();
             if (splitAtts.length == 1) {
               if (bestSuggestion.merit - bestSplitSuggestions[i].merit > hoeffdingBound) {
                 poorAtts.add(new Integer(splitAtts[0]));
               }
             }
           }
         }
         // scan 2 - remove good ones from set
         for (int i = 0; i < bestSplitSuggestions.length; i++) {
           if (bestSplitSuggestions[i].splitTest != null) {
             int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn();
             if (splitAtts.length == 1) {
               if (bestSuggestion.merit - bestSplitSuggestions[i].merit < hoeffdingBound) {
                 poorAtts.remove(new Integer(splitAtts[0]));
               }
             }
           }
         }
         for (int poorAtt : poorAtts) {
           node.disableAttribute(poorAtt);
         }
       }
     }
     if (shouldSplit) {
       AttributeSplitSuggestion splitDecision =
           bestSplitSuggestions[bestSplitSuggestions.length - 1];
       if (splitDecision.splitTest == null) {
         // preprune - null wins
         deactivateLearningNode(node, parent, parentIndex);
       } else {
         SplitNode newSplit =
             newSplitNode(splitDecision.splitTest, node.getObservedClassDistribution());
         for (int i = 0; i < splitDecision.numSplits(); i++) {
           // Unique Change of HoeffdingTree
           Node newChild =
               newLearningNode(
                   splitDecision.resultingClassDistributionFromSplit(i),
                   ((LearningNodeClassifier) node).getClassifier());
           newSplit.setChild(i, newChild);
         }
         this.activeLeafNodeCount--;
         this.decisionNodeCount++;
         this.activeLeafNodeCount += splitDecision.numSplits();
         if (parent == null) {
           this.treeRoot = newSplit;
         } else {
           parent.setChild(parentIndex, newSplit);
         }
       }
       // manage memory
       enforceTrackerLimit();
     }
   }
 }