/** * Bagging method. * * @param data the training data to be used for generating the bagged classifier. * @exception Exception if the classifier could not be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } double outOfBagCount = 0.0; double errorSum = 0.0; int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; boolean[] inBag = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag = new boolean[data.numInstances()]; bagData = resampleWithWeights(data, random, inBag); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } // build the classifier m_Classifiers[j].buildClassifier(bagData); if (m_CalcOutOfBag) { // calculate out of bag error for (int i = 0; i < inBag.length; i++) { if (!inBag[i]) { Instance outOfBagInst = data.instance(i); outOfBagCount += outOfBagInst.weight(); if (data.classAttribute().isNumeric()) { errorSum += outOfBagInst.weight() * Math.abs( m_Classifiers[j].classifyInstance(outOfBagInst) - outOfBagInst.classValue()); } else { if (m_Classifiers[j].classifyInstance(outOfBagInst) != outOfBagInst.classValue()) { errorSum += outOfBagInst.weight(); } } } } } } m_OutOfBagError = errorSum / outOfBagCount; }
/** * Parses a given list of options. Valid options are: * * <p>-W classname <br> * Specify the full class name of a weak classifier as the basis for bagging (required). * * <p>-I num <br> * Set the number of bagging iterations (default 10). * * <p>-S seed <br> * Random number seed for resampling (default 1). * * <p>-P num <br> * Size of each bag, as a percentage of the training size (default 100). * * <p>-O <br> * Compute out of bag error. * * <p>Options after -- are passed to the designated classifier. * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String bagSize = Utils.getOption('P', options); if (bagSize.length() != 0) { setBagSizePercent(Integer.parseInt(bagSize)); } else { setBagSizePercent(100); } setCalcOutOfBag(Utils.getFlag('O', options)); super.setOptions(options); }