コード例 #1
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Converts the tree under this node to a string
   *
   * @param treeLevel the depth of this node; the root of a tree should have treeLevel = 0
   * @param deviation the global deviation of the class column, used for evaluating relative errors
   * @return the converted string
   */
  public final String treeToString(int treeLevel, double deviation) {

    int i;
    StringBuffer text = new StringBuffer();

    if (type == true) {
      text.append("\n");
      for (i = 1; i <= treeLevel; i++) {
        text.append("    ");
      }
      if (itemsets.getAttribute(splitAttr).name().charAt(0) != '[') {
        text.append(
            itemsets.getAttribute(splitAttr).name()
                + " <= "
                + M5.doubleToStringG(splitValue, 1, 3)
                + " ");
      } else {
        text.append(itemsets.getAttribute(splitAttr).name() + " false : ");
      }
      treeLevel++;
      text.append(leftNode.treeToString(treeLevel, deviation));
      treeLevel--;
      for (i = 1; i <= treeLevel; i++) {
        text.append("    ");
      }
      if (itemsets.getAttribute(splitAttr).name().charAt(0) != '[') {
        text.append(
            itemsets.getAttribute(splitAttr).name()
                + " >  "
                + M5.doubleToStringG(splitValue, 1, 3)
                + " ");
      } else {
        text.append(itemsets.getAttribute(splitAttr).name() + " true : ");
      }
      treeLevel++;
      text.append(rightNode.treeToString(treeLevel, deviation));
      treeLevel--;
    } else { // LEAF
      text.append(" THEN LM" + lm + "\n");
      /*      if(deviation > 0.0)
      text.append(" (" + itemsets.numItemsets() + "/" +
           M5.doubleToStringG((100. * errors.rootMeanSqrErr /
               deviation),1,3) + "%)\n");
           else text.append(" (" + itemsets.numItemsets() + ")\n");*/
    }

    return text.toString();
  }
コード例 #2
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Converts the performance measures into a string
   *
   * @param measures contains both the unsmoothed and smoothed measures
   * @param inst the itemsets
   * @param lmNo also converts the predictions by all linear models if lmNo=0, or one linear model
   *     spedified by lmNo.
   * @param verbosity the verbosity level
   * @param str the type of evaluation, one of "t" for training, "T" for testing, "f" for fold
   *     training, "F" for fold testing, "x" for cross-validation
   * @return the converted string
   * @exception Exception if something goes wrong
   */
  public final String measuresToString(
      Measures measures[], MyDataset inst, int lmNo, int verbosity, String str) throws Exception {

    StringBuffer text = new StringBuffer();
    double absDev, sd;

    absDev = M5.absDev(inst.getClassIndex(), inst);
    sd = M5.stdDev(inst.getClassIndex(), inst);

    text.append("  Without smoothing:\n\n");
    if ((verbosity >= 2 || lmNo != 0) && (str.equals("T") == true || str.equals("F") == true)) {
      text.append(predictionsToString(inst, lmNo, false));
    }
    text.append(measures[0].toString(absDev, sd, str, "u") + "\n\n");
    text.append("  With smoothing:\n\n");
    if ((verbosity >= 2 || lmNo != 0) && (str.equals("T") == true || str.equals("F") == true)) {
      text.append(this.predictionsToString(inst, lmNo, true));
    }
    text.append(measures[1].toString(absDev, sd, str, "s") + "\n\n");

    return text.toString();
  }
コード例 #3
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Computes performance measures of a tree
   *
   * @param inst itemsets
   * @param smooth =true uses the smoothed models; otherwise uses the unsmoothed models
   * @return the performance measures
   * @exception Exception if something goes wrong
   */
  public final Measures measures(MyDataset inst, boolean smooth) throws Exception {

    int i, numItemsets, count;
    double sd, y1[], y2[];
    Measures measures = new Measures();

    errors = this.errors(inst, smooth);
    numItemsets = errors.numItemsets - errors.missingItemsets;
    y1 = new double[numItemsets];
    y2 = new double[numItemsets];
    count = 0;
    for (i = 0; i <= inst.numItemsets() - 1; i++) {
      y1[count] = this.predict(inst.itemset(i), smooth);
      y2[count] = inst.itemset(i).getClassValue();
      count++;
    }

    measures.correlation = M5.correlation(y1, y2, numItemsets);

    sd = M5.stdDev(inst.getClassIndex(), inst);
    if (sd > 0.0) {
      measures.meanAbsErr = errors.meanAbsErr;
      measures.meanSqrErr = errors.meanSqrErr;
      measures.type = 0;
    } else {
      if (numItemsets >= 1) {
        measures.type = 1;
        measures.meanAbsErr = errors.meanAbsErr;
        measures.meanSqrErr = errors.meanSqrErr;
      } else {
        measures.type = 2;
        measures.meanAbsErr = 0.0;
        measures.meanSqrErr = 0.0;
      }
    }

    return measures;
  }
コード例 #4
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Converts the predictions by the tree under this node to a string
   *
   * @param inst itemsets
   * @param smooth =true using the smoothed models; otherwise, the unsmoothed
   * @param lmNo the number of the associated linear model
   * @return the converted string
   * @exception Exception if something goes wrong
   */
  public final String predictionsToString(MyDataset inst, int lmNo, boolean smooth)
      throws Exception {
    int i, lmNum;
    double value;
    StringBuffer text = new StringBuffer();

    text.append(
        "    Predicting test itemsets ("
            + inst.getAttribute(inst.getClassIndex()).name()
            + ", column "
            + (inst.getClassIndex() + 1)
            + ")\n\n");
    for (i = 0; i <= inst.numItemsets() - 1; i++) {
      lmNum = this.leafNum(inst.itemset(i));
      if (lmNo == 0 || lmNo == lmNum) {
        text.append("      Predicting " + i + " (LM" + lmNum + "):  ");
        text.append(inst.itemset(i).toString() + "\n");
        value = this.predict(inst.itemset(i), smooth);
        if (inst.itemset(i).classIsMissing() == false) {
          text.append(
              "      Actual value: "
                  + M5.doubleToStringG(inst.itemset(i).getClassValue(), 9, 4)
                  + "    Prediction: "
                  + M5.doubleToStringG(value, 9, 4)
                  + "    Abs. error: "
                  + M5.doubleToStringG(Math.abs(inst.itemset(i).getClassValue() - value), 9, 4)
                  + "\n\n");
        } else {
          text.append(
              "      Actual value:   missing    Prediction: "
                  + M5.doubleToStringG(value, 9, 4)
                  + "    Abs. Error: undefined\n\n");
        }
      }
    }

    return text.toString();
  }
コード例 #5
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Finds the appropriate order of the unsmoothed linear model at this node
   *
   * @exception Exception if something goes wrong
   */
  public final void function() throws Exception {

    int n, jmin, flag = 0;
    double err1, err2, sdy;
    Results e1, e2;
    Function f1 = unsmoothed;
    Function f2;

    if (f1.terms[0] != 0) {
      sdy = M5.stdDev(itemsets.getClassIndex(), itemsets);
      this.regression(f1);
      valueNode = false;
      if (model != LINEAR_REGRESSION) {
        e1 = f1.errors(itemsets);
        err1 =
            e1.rootMeanSqrErr * this.factor(itemsets.numItemsets(), f1.terms[0] + 1, pruningFactor);
        flag = 0;

        while (flag == 0) {
          jmin = f1.insignificant(sdy, itemsets);
          if (jmin == -1) {
            flag = 1;
          } else {
            f2 = f1.remove(jmin);
            this.regression(f2);
            e2 = f2.errors(itemsets);
            err2 =
                e2.rootMeanSqrErr
                    * this.factor(itemsets.numItemsets(), f2.terms[0] + 1, pruningFactor);
            if (err2 > err1 && err2 > deviation * 0.00001) {
              flag = 1;
            } else { // compare estimated error with and without attr jmin
              f1 = f2;
              err1 = err2;
              if (f1.terms[0] == 0) {
                flag = 1;
              }
            }
          }
        }
      }
      unsmoothed = f1;
    }
    if (unsmoothed.terms[0] == 0) { // constant function without attributes
      this.valueNode();
    }
  }
コード例 #6
0
ファイル: M5StaticUtils.java プロジェクト: micyee/granada
  /**
   * Creates a new instance of an object given it's class name and (optional) arguments to pass to
   * it's setOptions method. If the object implements OptionHandler and the options parameter is
   * non-null, the object will have it's options set. Example use:
   *
   * <p><code> <pre>
   * String classifierName = M5StaticUtils.getOption('W', options);
   * Classifier c = (Classifier)M5StaticUtils.forName(Classifier.class,
   *                                          classifierName,
   *                                          options);
   * setClassifier(c);
   * </pre></code>
   *
   * @param classType the class that the instantiated object should be assignable to -- an exception
   *     is thrown if this is not the case
   * @param className the fully qualified class name of the object
   * @param options an array of options suitable for passing to setOptions. May be null. Any options
   *     accepted by the object will be removed from the array.
   * @return the newly created object, ready for use.
   * @exception Exception if the class name is invalid, or if the class is not assignable to the
   *     desired class type, or the options supplied are not acceptable to the object
   */
  public static Object forName(Class classType, String className, String[] options)
      throws Exception {

    Class c = null;
    try {
      c = Class.forName(className);
    } catch (Exception ex) {
      throw new Exception("Can't find class called: " + className);
    }
    if (!classType.isAssignableFrom(c)) {
      throw new Exception(classType.getName() + " is not assignable from " + className);
    }
    Object o = c.newInstance();
    if ((o instanceof M5) && (options != null)) {
      ((M5) o).setOptions(options);
      M5StaticUtils.checkForRemainingOptions(options);
    }
    return o;
  }
コード例 #7
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Recursively smoothens the unsmoothed linear model at this node with the unsmoothed linear
   * models at the nodes above this
   *
   * @param current the unsmoothed linear model at the up node of the 'current' will be used for
   *     smoothening
   */
  public final void smoothenFormula(M5TreeNode current) {

    int i = smoothed.terms[0], j = current.upNode.unsmoothed.terms[0], k, l, smoothingConstant = 15;
    Function function;

    function = Function.combine(smoothed, current.upNode.unsmoothed);

    function.coeffs[0] =
        M5.smoothenValue(
            smoothed.coeffs[0],
            current.upNode.unsmoothed.coeffs[0],
            current.itemsets.numItemsets(),
            smoothingConstant);
    for (k = function.terms[0]; k >= 1; k--) {
      if (i >= 1 && j >= 1) {
        if (function.terms[k] == smoothed.terms[i]
            && function.terms[k] == current.upNode.unsmoothed.terms[j]) {
          function.coeffs[k] =
              M5.smoothenValue(
                  smoothed.coeffs[i],
                  current.upNode.unsmoothed.coeffs[j],
                  current.itemsets.numItemsets(),
                  smoothingConstant);
          i--;
          j--;
        } else if (function.terms[k] == smoothed.terms[i]
            && function.terms[k] != current.upNode.unsmoothed.terms[j]) {
          function.coeffs[k] =
              M5.smoothenValue(
                  smoothed.coeffs[i], 0.0, current.itemsets.numItemsets(), smoothingConstant);
          i--;
        } else if (function.terms[k] != smoothed.terms[i]
            && function.terms[k] == current.upNode.unsmoothed.terms[j]) {
          function.coeffs[k] =
              M5.smoothenValue(
                  0.0,
                  current.upNode.unsmoothed.coeffs[j],
                  current.itemsets.numItemsets(),
                  smoothingConstant);
          j--;
        } else {
          M5.errorMsg("wrong terms value in smoothing_formula().");
        }
      } else if (i < 1 && j < 1) {
        break;
      } else if (j >= 1) {
        for (l = k; l >= 1; l--) {
          function.coeffs[l] =
              M5.smoothenValue(
                  0.0,
                  current.upNode.unsmoothed.coeffs[j--],
                  current.itemsets.numItemsets(),
                  smoothingConstant);
        }
        break;
      } else {
        for (l = k; l >= 1; l--) {
          function.coeffs[l] =
              M5.smoothenValue(
                  smoothed.coeffs[i--], 0.0, current.itemsets.numItemsets(), smoothingConstant);
        }
        break;
      }
    }
    smoothed = function;
    if (current.upNode.upNode != null) {
      this.smoothenFormula(current.upNode);
    }
  }
コード例 #8
0
ファイル: M5TreeNode.java プロジェクト: micyee/granada
  /**
   * Splits the node recursively, unless there are few itemsets or itemsets have similar values of
   * the class attribute
   *
   * @param inst itemsets
   * @exception Exception if something goes wrong
   */
  public final void split(MyDataset inst) throws Exception {
    SplitInfo s, sMax;
    int j, partition;
    MyDataset leftInst, rightInst;

    itemsets = inst;
    if (itemsets.numItemsets() < SPLIT_NUM
        || M5.stdDev(itemsets.getClassIndex(), itemsets) < deviation * 0.05) {
      type = false;
    } else {
      sMax = new SplitInfo(0, itemsets.numItemsets() - 1, -1);
      s = new SplitInfo(0, itemsets.numItemsets() - 1, -1);
      for (j = 0; j < itemsets.numAttributes(); j++) {
        if (j != itemsets.getClassIndex()) {
          itemsets.sort(itemsets.getAttribute(j));
          s.attrSplit(j, itemsets);
          if ((Math.abs(s.maxImpurity - sMax.maxImpurity) > 1.e-6)
              && (s.maxImpurity > sMax.maxImpurity + 1.e-6)) {
            sMax = s.copy();
          }
        }
      }

      if (sMax.splitAttr < 0 || sMax.position < 1 || sMax.position > itemsets.numItemsets() - 1) {
        type = false;
      }
      if (type == true) {
        sf = sMax;
        splitAttr = sMax.splitAttr; // split attribute
        splitValue = sMax.splitValue; // split value
        unsmoothed = new Function(splitAttr); // unsmoothed function

        leftInst = new MyDataset(itemsets, itemsets.numItemsets());
        rightInst = new MyDataset(itemsets, itemsets.numItemsets());
        int nmissings = 0, missings[] = new int[itemsets.numItemsets()];
        for (int i = 0; i < itemsets.numItemsets(); i++) {
          if (!itemsets.isMissing(i, splitAttr)) {
            if (itemsets.itemset(i).getValue(splitAttr) <= splitValue) {
              leftInst.addItemset(itemsets.itemset(i));
            } else {
              rightInst.addItemset(itemsets.itemset(i));
            }
          } else {
            missings[nmissings] = i;
            nmissings++;
          }
        }

        // Missing values treatment
        if (nmissings > 0) {
          // Calculate the average class value
          double avgRight = 0.0, avgLeft = 0.0;
          if (itemsets.getAttribute(splitAttr).isEnumerate()) {
            avgRight = rightInst.averageClassValue();
            avgLeft = leftInst.averageClassValue();
          } else {
            if (rightInst.numItemsets() > 3) {
              rightInst.sort(splitAttr);
              int n = rightInst.numItemsets();
              double sum =
                  rightInst.itemset(n - 1).getClassValue()
                      + rightInst.itemset(n - 2).getClassValue()
                      + rightInst.itemset(n - 3).getClassValue();
              avgRight = sum / ((double) n);
            } else {
              avgRight = rightInst.averageClassValue();
            }
            if (leftInst.numItemsets() > 3) {
              leftInst.sort(splitAttr);
              int n = leftInst.numItemsets();
              double sum =
                  leftInst.itemset(0).getClassValue()
                      + leftInst.itemset(1).getClassValue()
                      + leftInst.itemset(2).getClassValue();
              avgLeft = sum / ((double) n);
            } else {
              avgLeft = leftInst.averageClassValue();
            }
          }
          double avgClassValue = (avgRight + avgLeft) / 2.0;
          // Give out the missing instances
          for (int i = 0; i < nmissings; i++) {
            if (itemsets.itemset(missings[i]).getClassValue() <= avgClassValue) {
              if (avgRight <= avgLeft) {
                rightInst.addItemset(itemsets.itemset(missings[i]));
              } else {
                leftInst.addItemset(itemsets.itemset(missings[i]));
              }
            } else {
              if (avgRight > avgLeft) {
                rightInst.addItemset(itemsets.itemset(missings[i]));
              } else {
                leftInst.addItemset(itemsets.itemset(missings[i]));
              }
            }
          }
        }

        leftInst.compactify();
        rightInst.compactify();

        leftNode = new M5TreeNode(leftInst, this);
        leftNode.split(leftInst); // split left node

        rightNode = new M5TreeNode(rightInst, this);
        rightNode.split(rightInst); // split right node

        // Give the missing values the average value for the splitting attribute
        if (nmissings > 0) {
          double avgAtt = itemsets.averageValue(splitAttr);
          for (int i = 0; i < nmissings; i++)
            itemsets.itemset(missings[i]).setValue(splitAttr, avgAtt);
        }

        this.valueNode(); // function of the constant value

        if (model != REGRESSION_TREE) {
          unsmoothed = Function.combine(unsmoothed, leftNode.unsmoothed);
          // passes up the attributes found under the left node
          unsmoothed = Function.combine(unsmoothed, rightNode.unsmoothed);
          // passes up the attributes found under the right node
        } else {
          unsmoothed = new Function();
        }
      }
    }

    if (type == false) { // a leaf node
      this.leafNode();
      errors = unsmoothed.errors(itemsets);
    }
  }