예제 #1
0
  /**
   * Bagging method.
   *
   * @param data the training data to be used for generating the bagged classifier.
   * @exception Exception if the classifier could not be built successfully
   */
  @Override
  public void buildClassifier(Instances data) throws Exception {

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
      throw new IllegalArgumentException(
          "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    double outOfBagCount = 0.0;
    double errorSum = 0.0;

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    Random random = new Random(m_Seed);
    for (int j = 0; j < m_Classifiers.length; j++) {
      Instances bagData = null;
      boolean[] inBag = null;
      // create the in-bag dataset
      if (m_CalcOutOfBag) {
        inBag = new boolean[data.numInstances()];
        bagData = resampleWithWeights(data, random, inBag);
      } else {
        bagData = data.resampleWithWeights(random);
        if (bagSize < data.numInstances()) {
          bagData.randomize(random);
          Instances newBagData = new Instances(bagData, 0, bagSize);
          bagData = newBagData;
        }
      }
      if (m_Classifier instanceof Randomizable) {
        ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
      }
      // build the classifier
      m_Classifiers[j].buildClassifier(bagData);
      if (m_CalcOutOfBag) {
        // calculate out of bag error
        for (int i = 0; i < inBag.length; i++) {
          if (!inBag[i]) {
            Instance outOfBagInst = data.instance(i);
            outOfBagCount += outOfBagInst.weight();
            if (data.classAttribute().isNumeric()) {
              errorSum +=
                  outOfBagInst.weight()
                      * Math.abs(
                          m_Classifiers[j].classifyInstance(outOfBagInst)
                              - outOfBagInst.classValue());
            } else {
              if (m_Classifiers[j].classifyInstance(outOfBagInst) != outOfBagInst.classValue()) {
                errorSum += outOfBagInst.weight();
              }
            }
          }
        }
      }
    }
    m_OutOfBagError = errorSum / outOfBagCount;
  }
예제 #2
0
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-W classname <br>
   * Specify the full class name of a weak classifier as the basis for bagging (required).
   *
   * <p>-I num <br>
   * Set the number of bagging iterations (default 10).
   *
   * <p>-S seed <br>
   * Random number seed for resampling (default 1).
   *
   * <p>-P num <br>
   * Size of each bag, as a percentage of the training size (default 100).
   *
   * <p>-O <br>
   * Compute out of bag error.
   *
   * <p>Options after -- are passed to the designated classifier.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String bagSize = Utils.getOption('P', options);
    if (bagSize.length() != 0) {
      setBagSizePercent(Integer.parseInt(bagSize));
    } else {
      setBagSizePercent(100);
    }

    setCalcOutOfBag(Utils.getFlag('O', options));

    super.setOptions(options);
  }