Beispiel #1
0
  /**
   * Recreate a tree node content with the specified data based on the tree node's existing content.
   *
   * @param first The start(inclusive) index of the train data used for creating the tree node
   *     content.
   * @param last The end(exclusive) index of the train data used for creating the tree node content.
   * @param node The tree node whose content need to be recreated.
   * @return The recreated tree node content.
   */
  private TreeNodeContent createContent(int first, int last, TreeNode node) {
    // Compute the total weight and its class distribution of [first
    // last) prune cases
    float totalWeight = 0;
    AttributeDelegate classAttributeDelegate =
        this.attributeDelegates[dataSet.getClassAttributeIndex()];
    float[] totalClassDistri = new float[this.dataSet.getClassCount()];
    Arrays.fill(totalClassDistri, 0);
    for (int i = first; i < last; i++) {
      int classLabel = classAttributeDelegate.getClassBranch(this.cases[i]);
      totalClassDistri[classLabel] += this.weight[cases[i]];
    }

    // Find the original classification of the tree node
    String nodeClassification = node.getContent().getClassification();
    String[] classValues = this.dataSet.getClassValues();
    int maxClassIndex = indexOf(nodeClassification, classValues);

    // Find the most probable classification of the prune data on
    // the current tree node
    for (int i = 0; i < totalClassDistri.length; i++) {
      totalWeight += totalClassDistri[i];
      if (totalClassDistri[i] > totalClassDistri[maxClassIndex]) maxClassIndex = i;
    }

    String classification = classValues[maxClassIndex];

    // Estimate the leafError of the tree node with the [first last)
    // prune data
    float basicLeafError = totalWeight - totalClassDistri[maxClassIndex];
    float extraLeafError = Estimator.getExtraError(totalWeight, basicLeafError);
    float estimatedLeafError = basicLeafError + extraLeafError;

    return new TreeNodeContent(totalWeight, totalClassDistri, classification, estimatedLeafError);
  }
Beispiel #2
0
  /** Initialize a tree pruner which prunes the built decision tree */
  TreePruner(
      DataSet dataSet, AttributeDelegate[] attributeDelegates, TreeNode root, DecisionTree dt) {
    this.dataSet = dataSet;
    this.attributeDelegates = attributeDelegates;
    this.root = root;
    this.dt = dt;

    // ReInitialize the data sequence and their weight
    int caseCount = dataSet.getCaseCount();
    this.cases = new int[caseCount];
    for (int i = 0; i < this.cases.length; i++) this.cases[i] = i;
    this.weight = new float[caseCount];
    Arrays.fill(this.weight, 1.0f);

    // Reset the cases and weight array of all attributes delegate
    // objects
    for (AttributeDelegate attributeDelegate : attributeDelegates) {
      attributeDelegate.setCasesWeight(this.cases, this.weight);
    }

    /* float errorAfterPrune = */ ebpPrune(this.root, 0, caseCount, true);
  }