示例#1
0
  /**
   * Makes a copy of the tree under this node
   *
   * @param up the parant node of the new node
   * @return a copy of the tree under this node
   * @exception Exception if something goes wrong
   */
  public final M5TreeNode copy(M5TreeNode up) throws Exception {

    M5TreeNode node = new M5TreeNode(itemsets, upNode);

    node.type = type;
    node.splitAttr = splitAttr;
    node.splitValue = splitValue;
    node.unsmoothed = unsmoothed.copy();
    node.smoothed = smoothed.copy();
    node.valueNode = valueNode;
    node.upNode = up;
    if (errors == null) {
      node.errors = null;
    } else {
      node.errors = errors.copy();
    }
    node.numParameters = node.numParameters;
    if (sf == null) {
      node.sf = null;
    } else {
      node.sf = sf.copy();
    }
    node.itemsets = new MyDataset(itemsets, 0, itemsets.numItemsets());
    node.lm = lm;

    node.model = model;
    node.pruningFactor = pruningFactor;
    node.deviation = deviation;

    if (leftNode != null) {
      node.leftNode = leftNode.copy(node);
    } else {
      node.leftNode = null;
    }
    if (rightNode != null) {
      node.rightNode = rightNode.copy(node);
    } else {
      node.rightNode = null;
    }

    return node;
  }
示例#2
0
  /**
   * Splits the node recursively, unless there are few itemsets or itemsets have similar values of
   * the class attribute
   *
   * @param inst itemsets
   * @exception Exception if something goes wrong
   */
  public final void split(MyDataset inst) throws Exception {
    SplitInfo s, sMax;
    int j, partition;
    MyDataset leftInst, rightInst;

    itemsets = inst;
    if (itemsets.numItemsets() < SPLIT_NUM
        || M5.stdDev(itemsets.getClassIndex(), itemsets) < deviation * 0.05) {
      type = false;
    } else {
      sMax = new SplitInfo(0, itemsets.numItemsets() - 1, -1);
      s = new SplitInfo(0, itemsets.numItemsets() - 1, -1);
      for (j = 0; j < itemsets.numAttributes(); j++) {
        if (j != itemsets.getClassIndex()) {
          itemsets.sort(itemsets.getAttribute(j));
          s.attrSplit(j, itemsets);
          if ((Math.abs(s.maxImpurity - sMax.maxImpurity) > 1.e-6)
              && (s.maxImpurity > sMax.maxImpurity + 1.e-6)) {
            sMax = s.copy();
          }
        }
      }

      if (sMax.splitAttr < 0 || sMax.position < 1 || sMax.position > itemsets.numItemsets() - 1) {
        type = false;
      }
      if (type == true) {
        sf = sMax;
        splitAttr = sMax.splitAttr; // split attribute
        splitValue = sMax.splitValue; // split value
        unsmoothed = new Function(splitAttr); // unsmoothed function

        leftInst = new MyDataset(itemsets, itemsets.numItemsets());
        rightInst = new MyDataset(itemsets, itemsets.numItemsets());
        int nmissings = 0, missings[] = new int[itemsets.numItemsets()];
        for (int i = 0; i < itemsets.numItemsets(); i++) {
          if (!itemsets.isMissing(i, splitAttr)) {
            if (itemsets.itemset(i).getValue(splitAttr) <= splitValue) {
              leftInst.addItemset(itemsets.itemset(i));
            } else {
              rightInst.addItemset(itemsets.itemset(i));
            }
          } else {
            missings[nmissings] = i;
            nmissings++;
          }
        }

        // Missing values treatment
        if (nmissings > 0) {
          // Calculate the average class value
          double avgRight = 0.0, avgLeft = 0.0;
          if (itemsets.getAttribute(splitAttr).isEnumerate()) {
            avgRight = rightInst.averageClassValue();
            avgLeft = leftInst.averageClassValue();
          } else {
            if (rightInst.numItemsets() > 3) {
              rightInst.sort(splitAttr);
              int n = rightInst.numItemsets();
              double sum =
                  rightInst.itemset(n - 1).getClassValue()
                      + rightInst.itemset(n - 2).getClassValue()
                      + rightInst.itemset(n - 3).getClassValue();
              avgRight = sum / ((double) n);
            } else {
              avgRight = rightInst.averageClassValue();
            }
            if (leftInst.numItemsets() > 3) {
              leftInst.sort(splitAttr);
              int n = leftInst.numItemsets();
              double sum =
                  leftInst.itemset(0).getClassValue()
                      + leftInst.itemset(1).getClassValue()
                      + leftInst.itemset(2).getClassValue();
              avgLeft = sum / ((double) n);
            } else {
              avgLeft = leftInst.averageClassValue();
            }
          }
          double avgClassValue = (avgRight + avgLeft) / 2.0;
          // Give out the missing instances
          for (int i = 0; i < nmissings; i++) {
            if (itemsets.itemset(missings[i]).getClassValue() <= avgClassValue) {
              if (avgRight <= avgLeft) {
                rightInst.addItemset(itemsets.itemset(missings[i]));
              } else {
                leftInst.addItemset(itemsets.itemset(missings[i]));
              }
            } else {
              if (avgRight > avgLeft) {
                rightInst.addItemset(itemsets.itemset(missings[i]));
              } else {
                leftInst.addItemset(itemsets.itemset(missings[i]));
              }
            }
          }
        }

        leftInst.compactify();
        rightInst.compactify();

        leftNode = new M5TreeNode(leftInst, this);
        leftNode.split(leftInst); // split left node

        rightNode = new M5TreeNode(rightInst, this);
        rightNode.split(rightInst); // split right node

        // Give the missing values the average value for the splitting attribute
        if (nmissings > 0) {
          double avgAtt = itemsets.averageValue(splitAttr);
          for (int i = 0; i < nmissings; i++)
            itemsets.itemset(missings[i]).setValue(splitAttr, avgAtt);
        }

        this.valueNode(); // function of the constant value

        if (model != REGRESSION_TREE) {
          unsmoothed = Function.combine(unsmoothed, leftNode.unsmoothed);
          // passes up the attributes found under the left node
          unsmoothed = Function.combine(unsmoothed, rightNode.unsmoothed);
          // passes up the attributes found under the right node
        } else {
          unsmoothed = new Function();
        }
      }
    }

    if (type == false) { // a leaf node
      this.leafNode();
      errors = unsmoothed.errors(itemsets);
    }
  }