/** * Returns class probabilities for an instance. * * @param instance the instance to compute the distribution for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { // replace missing values m_replaceMissing.input(instance); instance = m_replaceMissing.output(); // possibly convert nominal attributes if (m_convertNominal) { m_nominalToBinary.input(instance); instance = m_nominalToBinary.output(); } return m_tree.distributionForInstance(instance); }
/** * Classifies the given instance using the linear regression function. * * @param instance the test instance * @return the classification * @throws Exception if classification can't be done successfully */ public double classifyInstance(Instance instance) throws Exception { // Transform the input instance Instance transformedInstance = instance; if (!m_checksTurnedOff) { m_TransformFilter.input(transformedInstance); m_TransformFilter.batchFinished(); transformedInstance = m_TransformFilter.output(); m_MissingFilter.input(transformedInstance); m_MissingFilter.batchFinished(); transformedInstance = m_MissingFilter.output(); } // Calculate the dependent variable from the regression model return regressionPrediction(transformedInstance, m_SelectedAttributes, m_Coefficients); }
/** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); // replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); // possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; // create a FT tree root if (m_modelType == 0) m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); // create a FTLeaves tree root if (m_modelType == 1) { m_tree = new FTLeavesNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); } // create a FTInner tree root if (m_modelType == 2) m_tree = new FTInnerNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); // build tree m_tree.buildClassifier(filteredData); // prune tree m_tree.prune(); m_tree.assignIDs(0); m_tree.cleanup(); }
/** * Builds a regression model for the given data. * * @param data the training data to be used for generating the linear regression function * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); } // Preprocess instances if (!m_checksTurnedOff) { m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(data); data = Filter.useFilter(data, m_MissingFilter); data.deleteWithMissingClass(); } else { m_TransformFilter = null; m_MissingFilter = null; } m_ClassIndex = data.classIndex(); m_TransformedData = data; // Turn all attributes on for a start m_SelectedAttributes = new boolean[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex) { m_SelectedAttributes[i] = true; } } m_Coefficients = null; // Compute means and standard deviations m_Means = new double[data.numAttributes()]; m_StdDevs = new double[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { m_Means[j] = data.meanOrMode(j); m_StdDevs[j] = Math.sqrt(data.variance(j)); if (m_StdDevs[j] == 0) { m_SelectedAttributes[j] = false; } } } m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex())); m_ClassMean = data.meanOrMode(m_TransformedData.classIndex()); // Perform the regression findBestModel(); // Save memory m_TransformedData = new Instances(data, 0); }