/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -S * Specify shrinkage rate. (default = 1.0, ie. no shrinkage) * </pre> * * <pre> * -I <num> * Number of iterations. * (default 10) * </pre> * * <pre> * -D * If set, classifier is run in debug mode and * may output additional info to the console * </pre> * * <pre> * -W * Full name of base classifier. * (default: weka.classifiers.trees.DecisionStump) * </pre> * * <pre> * Options specific to classifier weka.classifiers.trees.DecisionStump: * </pre> * * <pre> * -D * If set, classifier is run in debug mode and * may output additional info to the console * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString = Utils.getOption('S', options); if (optionString.length() != 0) { Double temp = Double.valueOf(optionString); setShrinkage(temp.doubleValue()); } // ++ LEF ++ optionString = Utils.getOption('P', options); if (optionString.length() != 0) { Double temp = Double.valueOf(optionString); setPercentage(temp.doubleValue()); } super.setOptions(options); }
/** * Build the classifier on the supplied data * * @param data the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); double sum = 0; double temp_sum = 0; // Add the model for the mean first m_zeroR = new ZeroR(); m_zeroR.buildClassifier(newData); // only class? -> use only ZeroR model if (newData.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_SuitableData = false; return; } else { m_SuitableData = true; } newData = residualReplace(newData, m_zeroR, false); for (int i = 0; i < newData.numInstances(); i++) { sum += newData.instance(i).weight() * newData.instance(i).classValue() * newData.instance(i).classValue(); } if (m_Debug) { System.err.println("Sum of squared residuals " + "(predicting the mean) : " + sum); } m_NumIterationsPerformed = 0; do { temp_sum = sum; // +++++ CHANGES FROM LEFMAN START ++++++++ Resample resample = new Resample(); resample.setRandomSeed(m_NumIterationsPerformed); resample.setNoReplacement(true); resample.setSampleSizePercent(getPercentage()); resample.setInputFormat(newData); Instances sampledData = Filter.useFilter(newData, resample); // Build the classifier // m_Classifiers[m_NumIterationsPerformed].buildClassifier(newData); m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampledData); // output the number of nodes in the tree! double numNodes = ((REPTree) m_Classifiers[m_NumIterationsPerformed]).getMeasure("measureTreeSize"); if (m_Debug) { System.err.println("It#: " + m_NumIterationsPerformed + " #nodes: " + numNodes); } // +++++ CHANGES FROM LEFMAN END ++++++++ newData = residualReplace(newData, m_Classifiers[m_NumIterationsPerformed], true); sum = 0; for (int i = 0; i < newData.numInstances(); i++) { sum += newData.instance(i).weight() * newData.instance(i).classValue() * newData.instance(i).classValue(); } if (m_Debug) { System.err.println("Sum of squared residuals : " + sum); } m_NumIterationsPerformed++; } while (((temp_sum - sum) > Utils.SMALL) && (m_NumIterationsPerformed < m_Classifiers.length)); }