/** * resets the classifier, i.e., the best model, last known restart and iteration, etc. * * @see #m_BestModel * @see #m_LastIteration * @see #m_LastRestart */ @Override public void reset() { super.reset(); m_BestModel = null; m_LastIteration = -1; m_LastRestart = -1; m_LastRMSTrain = 1.0; m_LastRMSTest = 1.0; m_LastRMSTestOriginal = 1.0; m_LastRMS = 1.0; m_RMSTrain = 1.0; m_RMSTest = 1.0; m_RMSTestOriginal = 1.0; m_RMS = 1.0; m_AccTrain = 0.0; m_AccTestOriginal = 0.0; m_LastAccTrain = 0.0; // for logging m_LogEntries.clear(); m_LogEntries.addFilename("RMS", createFilename("-rms.csv")); m_LogEntries.addFilename("RMSTrain", createFilename("-rms_train.csv")); m_LogEntries.addFilename("RMSTest", createFilename("-rms_test.csv")); m_LogEntries.addFilename("RMSTestOriginal", createFilename("-rms_test-original.csv")); m_LogEntries.addFilename("AccTrain", createFilename("-acc_train.csv")); m_LogEntries.addFilename("AccTestOriginal", createFilename("-acc_test-original.csv")); m_LogEntries.addFilename("FlippedLabels", createFilename("-flipped.csv")); }
/** * builds the necessary CollectiveInstances from the given Instances * * @throws Exception if anything goes wrong */ @Override protected void generateSets() throws Exception { int i; super.generateSets(); m_CollectiveInstances = new CollectiveInstances(); m_CollectiveInstances.setSeed(getSeed()); m_CollectiveInstances.setFlipper(m_Flipper); m_TrainsetNew = new Instances(m_Trainset); for (i = 0; i < m_Testset.numInstances(); i++) m_TrainsetNew.add(m_Testset.instance(i)); m_FlipHistory = new FlipHistory(m_TrainsetNew); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -I <num> * Number of iterations. * (default 10)</pre> * * <pre> -R <num> * Number of restarts. * (default 10)</pre> * * <pre> -log * Creates logs in the tmp directory for all kinds of internal data. * Use only for debugging purposes! * </pre> * * <pre> -U * Updates also the labels of the training set. * </pre> * * <pre> -eval <num> * The type of evaluation to use (0 = Randomwalk/Last model used for * prediction, 1=Randomwalk/Best model used for prediction, * 2=Hillclimbing). * </pre> * * <pre> -compare <num> * The type of comparisong used for comparing models. * (0=overall RMS, 1=RMS on train set, 2=RMS on test set, * 3=Accuracy on train set) * </pre> * * <pre> -flipper "<classname [parameters]>" * The flipping algorithm (and optional parameters) to use for * flipping labels. * </pre> * * <pre> -folds <folds> * The number of folds for splitting the training set into * train and test set. The first fold is always the training * set. With '-V' you can invert this, i.e., instead of 20/80 * for 5 folds you'll get 80/20. * (default 5)</pre> * * <pre> -V * Inverts the fold selection, i.e., instead of using the first * fold for the training set it is used for test set and the * remaining folds for training.</pre> * * <pre> -verbose * Whether to print some more information during building the * classifier. * (default is off)</pre> * * <pre> -verbose * Whether to print some more information during building the * classifier. * (default is off)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.trees.J48)</pre> * * <pre> * Options specific to classifier weka.classifiers.trees.J48: * </pre> * * <pre> -U * Use unpruned tree.</pre> * * <pre> -C <pruning confidence> * Set confidence threshold for pruning. * (default 0.25)</pre> * * <pre> -M <minimum number of instances> * Set minimum number of instances per leaf. * (default 2)</pre> * * <pre> -R * Use reduced error pruning.</pre> * * <pre> -N <number of folds> * Set number of folds for reduced error * pruning. One fold is used as pruning set. * (default 3)</pre> * * <pre> -B * Use binary splits only.</pre> * * <pre> -S * Don't perform subtree raising.</pre> * * <pre> -L * Do not clean up after the tree has been built.</pre> * * <pre> -A * Laplace smoothing for predicted probabilities.</pre> * * <pre> -Q <seed> * Seed for random data shuffling (default 1).</pre> * * <!-- options-end --> * Options after -- are passed to the designated classifier. * * <p> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; String[] tmpOptions; tmpStr = Utils.getOption('I', options); if (tmpStr.length() != 0) setNumIterations(Integer.parseInt(tmpStr)); else setNumIterations(10); tmpStr = Utils.getOption('R', options); if (tmpStr.length() != 0) setNumRestarts(Integer.parseInt(tmpStr)); else setNumRestarts(10); setLog(Utils.getFlag("log", options)); setUpdateTraining(Utils.getFlag('U', options)); tmpStr = Utils.getOption("eval", options); if (tmpStr.length() != 0) setEvaluationType(new SelectedTag(Integer.parseInt(tmpStr), CollectiveInstances.EVAL_TAGS)); else setEvaluationType( new SelectedTag(CollectiveInstances.EVAL_RANDOMWALK_LAST, CollectiveInstances.EVAL_TAGS)); tmpStr = Utils.getOption("compare", options); if (tmpStr.length() != 0) setComparisonType( new SelectedTag(Integer.parseInt(tmpStr), CollectiveInstances.COMPARE_TAGS)); else setComparisonType( new SelectedTag(CollectiveInstances.COMPARE_RMS, CollectiveInstances.COMPARE_TAGS)); tmpStr = Utils.getOption("flipper", options); if (tmpStr.length() != 0) { tmpOptions = Utils.splitOptions(tmpStr); tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setFlipper(Flipper.forName(tmpStr, tmpOptions)); } else { setFlipper(new TriangleFlipper()); } super.setOptions(options); }
/** performs initialization of members */ @Override protected void initializeMembers() { super.initializeMembers(); m_NumIterations = 10; m_NumRestarts = 10; m_LastRestart = -1; m_LastIteration = -1; m_RMSTrain = 1.0; m_RMSTest = 1.0; m_RMSTestOriginal = 1.0; m_RMS = 1.0; m_AccTrain = 0.0; m_AccTestOriginal = 0.0; m_LastRMSTrain = 1.0; m_LastRMSTest = 1.0; m_LastRMSTestOriginal = 1.0; m_LastRMS = 1.0; m_LastAccTrain = 0.0; m_BestModel = null; m_CollectiveInstances = null; m_UpdateTraining = false; m_EvaluationType = CollectiveInstances.EVAL_RANDOMWALK_BEST; m_ComparisonType = CollectiveInstances.COMPARE_RMS_TRAIN; m_Log = false; m_TrainsetNew = null; m_FlipHistory = null; m_Classifier = new weka.classifiers.trees.J48(); m_Flipper = new TriangleFlipper(); m_LogEntries = new CollectiveLog(); m_AdditionalMeasures.add("measureLastRestart"); m_AdditionalMeasures.add("measureLastIteration"); m_AdditionalMeasures.add("measureLastRMSTrain"); m_AdditionalMeasures.add("measureLastRMSTest"); m_AdditionalMeasures.add("measureLastRMSTestOriginal"); m_AdditionalMeasures.add("measureLastRMS"); m_AdditionalMeasures.add("measureLastAccTrain"); }